From 038a108d9f9fc783a821bc98092b04d766d7982f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Tue, 30 Jan 2024 18:43:33 +0100
Subject: [PATCH] ppc64: Add a nop _aes_invert, to get decrypt subkeys
 compatible with vncipher.

---
 aes-invert-internal.c                   |  7 +++
 configure.ac                            |  5 +-
 fat-ppc.c                               | 10 ++++
 fat-setup.h                             |  1 +
 powerpc64/fat/aes-invert-internal-2.asm | 37 ++++++++++++++
 powerpc64/p8/aes-decrypt-internal.asm   | 23 ++-------
 powerpc64/p8/aes-invert-internal.asm    | 64 +++++++++++++++++++++++++
 7 files changed, 128 insertions(+), 19 deletions(-)
 create mode 100644 powerpc64/fat/aes-invert-internal-2.asm
 create mode 100644 powerpc64/p8/aes-invert-internal.asm

diff --git a/aes-invert-internal.c b/aes-invert-internal.c
index 7364616c..00d1ef6f 100644
--- a/aes-invert-internal.c
+++ b/aes-invert-internal.c
@@ -42,6 +42,13 @@
 
 #include "macros.h"
 
+/* For fat builds */
+#if HAVE_NATIVE_aes_invert
+void
+_nettle_aes_invert_c(unsigned rounds, uint32_t *dst, const uint32_t *src);
+#define _nettle_aes_invert _nettle_aes_invert_c
+#endif
+
 /* NOTE: We don't include rotated versions of the table. */
 static const uint32_t mtable[0x100] =
 {
diff --git a/configure.ac b/configure.ac
index d3145da9..98b6cac3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -574,6 +574,7 @@ fi
 # Files which replace a C source file (or otherwise don't correspond
 # to a new object file).
 asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
+		aes-invert-internal.asm \
 		aes128-set-encrypt-key.asm aes128-set-decrypt-key.asm \
 		aes128-encrypt.asm aes128-decrypt.asm \
 		aes192-set-encrypt-key.asm aes192-set-decrypt-key.asm \
@@ -594,7 +595,8 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
 
 # Assembler files which generate additional object files if they are used.
 asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
-  aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm memxor3-2.asm \
+  memxor-2.asm memxor3-2.asm \
+  aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm aes-invert-internal-2.asm \
   aes128-set-encrypt-key-2.asm aes128-set-decrypt-key-2.asm \
   aes128-encrypt-2.asm aes128-decrypt-2.asm \
   aes192-set-encrypt-key-2.asm aes192-set-decrypt-key-2.asm \
@@ -713,6 +715,7 @@ AH_VERBATIM([HAVE_NATIVE],
 #undef HAVE_NATIVE_memxor3
 #undef HAVE_NATIVE_aes_decrypt
 #undef HAVE_NATIVE_aes_encrypt
+#undef HAVE_NATIVE_aes_invert
 #undef HAVE_NATIVE_aes128_decrypt
 #undef HAVE_NATIVE_aes128_encrypt
 #undef HAVE_NATIVE_aes128_invert_key
diff --git a/fat-ppc.c b/fat-ppc.c
index b95365f6..cd76f7a1 100644
--- a/fat-ppc.c
+++ b/fat-ppc.c
@@ -163,6 +163,10 @@ DECLARE_FAT_FUNC(_nettle_aes_decrypt, aes_crypt_internal_func)
 DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, c)
 DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, ppc64)
 
+DECLARE_FAT_FUNC(_nettle_aes_invert, aes_invert_internal_func)
+DECLARE_FAT_FUNC_VAR(aes_invert, aes_invert_internal_func, c)
+DECLARE_FAT_FUNC_VAR(aes_invert, aes_invert_internal_func, ppc64)
+
 DECLARE_FAT_FUNC(_nettle_ghash_set_key, ghash_set_key_func)
 DECLARE_FAT_FUNC_VAR(ghash_set_key, ghash_set_key_func, c)
 DECLARE_FAT_FUNC_VAR(ghash_set_key, ghash_set_key_func, ppc64)
@@ -219,6 +223,7 @@ fat_init (void)
 	fprintf (stderr, "libnettle: enabling arch 2.07 code.\n");
       _nettle_aes_encrypt_vec = _nettle_aes_encrypt_ppc64;
       _nettle_aes_decrypt_vec = _nettle_aes_decrypt_ppc64;
+      _nettle_aes_invert_vec = _nettle_aes_invert_ppc64;
 
       /* Make sure _nettle_ghash_set_key_vec function is compatible
          with _nettle_ghash_update_vec function e.g. _nettle_ghash_key_c()
@@ -231,6 +236,7 @@ fat_init (void)
     {
       _nettle_aes_encrypt_vec = _nettle_aes_encrypt_c;
       _nettle_aes_decrypt_vec = _nettle_aes_decrypt_c;
+      _nettle_aes_invert_vec = _nettle_aes_invert_c;
       _nettle_ghash_set_key_vec = _nettle_ghash_set_key_c;
       _nettle_ghash_update_vec = _nettle_ghash_update_c;
     }
@@ -281,6 +287,10 @@ DEFINE_FAT_FUNC(_nettle_aes_decrypt, void,
  const uint8_t *src),
  (rounds, keys, T, length, dst, src))
 
+DEFINE_FAT_FUNC(_nettle_aes_invert, void,
+ (unsigned rounds, uint32_t *dst, const uint32_t *src),
+ (rounds, dst, src))
+
 DEFINE_FAT_FUNC(_nettle_ghash_set_key, void,
 		(struct gcm_key *ctx, const union nettle_block16 *key),
 		(ctx, key))
diff --git a/fat-setup.h b/fat-setup.h
index 6bf3e2fa..dc6fd20a 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -162,6 +162,7 @@ typedef void aes_crypt_internal_func (unsigned rounds, const uint32_t *keys,
 				      const struct aes_table *T,
 				      size_t length, uint8_t *dst,
 				      const uint8_t *src);
+typedef void aes_invert_internal_func (unsigned rounds, uint32_t *dst, const uint32_t *src);
 
 struct gcm_key;
 typedef void ghash_set_key_func (struct gcm_key *ctx, const union nettle_block16 *key);
diff --git a/powerpc64/fat/aes-invert-internal-2.asm b/powerpc64/fat/aes-invert-internal-2.asm
new file mode 100644
index 00000000..885a3dc6
--- /dev/null
+++ b/powerpc64/fat/aes-invert-internal-2.asm
@@ -0,0 +1,37 @@
+C powerpc64/fat/aes-invert-internal-2.asm
+
+
+ifelse(`
+   Copyright (C) 2020 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(_nettle_aes_invert) picked up by configure
+
+define(`fat_transform', `$1_ppc64')
+include_src(`powerpc64/p8/aes-invert-internal.asm')
diff --git a/powerpc64/p8/aes-decrypt-internal.asm b/powerpc64/p8/aes-decrypt-internal.asm
index d2a07a38..e03baab6 100644
--- a/powerpc64/p8/aes-decrypt-internal.asm
+++ b/powerpc64/p8/aes-decrypt-internal.asm
@@ -53,12 +53,6 @@ define(`S5', `v7')
 define(`S6', `v8')
 define(`S7', `v9')
 
-C ZERO vector register is used in place of RoundKey
-C for vncipher instruction because the order of InvMixColumns
-C and Xor processes are flipped in that instruction.
-C The Xor process with RoundKey is executed afterward.
-define(`ZERO', `v10')
-
 .file "aes-decrypt-internal.asm"
 
 .text
@@ -70,8 +64,6 @@ define(`ZERO', `v10')
 
 define(`FUNC_ALIGN', `5')
 PROLOGUE(_nettle_aes_decrypt)
- vxor ZERO,ZERO,ZERO
-
  DATA_LOAD_VEC(SWAP_MASK,.swap_mask,r5)
 
  subi ROUNDS,ROUNDS,1
@@ -121,8 +113,7 @@ IF_LE(`OPN_XXXY(vperm, SWAP_MASK, S0,S1,S2,S3,S4,S5,S6,S7)')
 L8x_round_loop:
  lxvd2x VSR(K),r9,KEYS
  vperm   K,K,K,SWAP_MASK
- OPN_XXY(vncipher, ZERO, S0, S1, S2, S3, S4, S5, S6, S7)
- OPN_XXY(vxor, K, S0, S1, S2, S3, S4, S5, S6, S7)
+ OPN_XXY(vncipher, K, S0, S1, S2, S3, S4, S5, S6, S7)
  subi r9,r9,0x10
  bdnz L8x_round_loop
 
@@ -177,8 +168,7 @@ IF_LE(`OPN_XXXY(vperm, SWAP_MASK, S0,S1,S2,S3)')
 L4x_round_loop:
  lxvd2x VSR(K),r9,KEYS
  vperm  K,K,K,SWAP_MASK
- OPN_XXY(vncipher, ZERO, S0, S1, S2, S3)
- OPN_XXY(vxor, K, S0, S1, S2, S3)
+ OPN_XXY(vncipher, K, S0, S1, S2, S3)
  subi   r9,r9,0x10
  bdnz  L4x_round_loop
 
@@ -221,10 +211,8 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK
 L2x_round_loop:
  lxvd2x VSR(K),r9,KEYS
  vperm  K,K,K,SWAP_MASK
- vncipher S0,S0,ZERO
- vncipher S1,S1,ZERO
- vxor  S0,S0,K
- vxor  S1,S1,K
+ vncipher S0,S0,K
+ vncipher S1,S1,K
  subi   r9,r9,0x10
  bdnz   L2x_round_loop
 
@@ -263,8 +251,7 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK')
 L1x_round_loop:
  lxvd2x VSR(K),r9,KEYS
  vperm  K,K,K,SWAP_MASK
- vncipher S0,S0,ZERO
- vxor   S0,S0,K
+ vncipher S0,S0,K
  subi   r9,r9,0x10
  bdnz   L1x_round_loop
 
diff --git a/powerpc64/p8/aes-invert-internal.asm b/powerpc64/p8/aes-invert-internal.asm
new file mode 100644
index 00000000..0158d5f3
--- /dev/null
+++ b/powerpc64/p8/aes-invert-internal.asm
@@ -0,0 +1,64 @@
+C powerpc64/p8/aes-invert-internal.asm
+
+ifelse(`
+   Copyright (C) 2024 Niels Möller
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+C Register usage:
+
+define(`SP', `r1')
+define(`TOCP', `r2')
+
+define(`ROUNDS', `r3')
+define(`DST', `r4')
+define(`SRC', `r5')
+
+define(`KEY', `v1')
+
+.file "aes-invert-internal.asm"
+
+.text
+
+ C _aes_invert(unsigned rounds, uint32_t *dst, const uint32_t *src)
+
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_aes_invert)
+	C Since decrypt wants the same subkeys, just copy, or do
+	C nothing if SRC == DST.
+	cmpld	SRC, DST
+	beq	.Ldone
+
+	sldi	ROUNDS, ROUNDS, 4
+.Loop:
+	lxvd2x	VSR(KEY),ROUNDS,SRC
+	stxvd2x	VSR(KEY),ROUNDS,DST
+	subic.	ROUNDS, ROUNDS, 0x10
+	bge	.Loop
+.Ldone:
+	blr
+EPILOGUE(_nettle_aes_invert)
-- 
GitLab