From 4471e5a47676feaf265c3ed3ce8447a4a2c803a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Sat, 13 Sep 2008 14:13:28 +0200
Subject: [PATCH] * x86/aes-encrypt-internal.asm: Updated use of
 AES_SUBST_BYTE. * x86/aes-decrypt-internal.asm: Likewise. * x86/aes.m4
 (BYTEREG): New macro. (AES_SUBST_BYTE): Take state registers as argument. Use
 BYTEREG to get the corresponding byte register.

Rev: nettle/x86/aes-decrypt-internal.asm:1.2
Rev: nettle/x86/aes-encrypt-internal.asm:1.2
Rev: nettle/x86/aes.m4:1.2
---
 x86/aes-decrypt-internal.asm |  2 +-
 x86/aes-encrypt-internal.asm |  2 +-
 x86/aes.m4                   | 57 ++++++++++++++++++++++++------------
 3 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/x86/aes-decrypt-internal.asm b/x86/aes-decrypt-internal.asm
index f97fa093..ff94fa8f 100644
--- a/x86/aes-decrypt-internal.asm
+++ b/x86/aes-decrypt-internal.asm
@@ -139,7 +139,7 @@ PROLOGUE(_nettle_aes_decrypt)
 	C Inverse S-box substitution
 	mov	$4,TMP
 .Lsubst:
-	AES_SUBST_BYTE(T, KEY)
+	AES_SUBST_BYTE(SA,SB,SC,SD,T, KEY)
 
 	decl	TMP
 	jnz	.Lsubst
diff --git a/x86/aes-encrypt-internal.asm b/x86/aes-encrypt-internal.asm
index 8fe041f0..dc573948 100644
--- a/x86/aes-encrypt-internal.asm
+++ b/x86/aes-encrypt-internal.asm
@@ -139,7 +139,7 @@ PROLOGUE(_nettle_aes_encrypt)
 	C S-box substitution
 	mov	$4,TMP
 .Lsubst:
-	AES_SUBST_BYTE(T, KEY)
+	AES_SUBST_BYTE(SA,SB,SC,SD, T, KEY)
 
 	decl	TMP
 	jnz	.Lsubst
diff --git a/x86/aes.m4 b/x86/aes.m4
index 3c770a73..97cb03af 100644
--- a/x86/aes.m4
+++ b/x86/aes.m4
@@ -67,28 +67,49 @@ define(<AES_FINAL_ROUND>, <
 	andl	<$>0xff000000,$6
 	orl	$6, $5>)dnl
 
-dnl AES_SUBST_BYTE(table, tmp)
+dnl BYTEREG(reg) gives the 8-bit register corresponding to the given 32-bit register.
+dnl Use in AES_SUBST_BYTE below, and is used by both the x86 and the x86_64 assembler.
+define(<BYTEREG>,<ifelse(
+	$1, %eax, %al,
+	$1, %ebx, %bl,
+	$1, %ecx, %cl,
+	$1, %edx, %dl,
+	dnl The rest are x86_64 only	
+	$1, %esi, %sil,
+	$1, %edi, %dil,
+	$1, %ebp, %bpl,
+	$1, %esp, %spl,
+	$1, %r8d, %r8b,
+	$1, %r9d, %r9b,
+	$1, %r10d, %r10b,
+	$1, %r11d, %r11b,
+	$1, %r12d, %r12b,
+	$1, %r13d, %r13b,
+	$1, %r14d, %r14b,
+	$1, %r15d, %r15b)>)dnl
+
+dnl AES_SUBST_BYTE(A, B, C, D, table, tmp)
 dnl Substitutes the least significant byte of
 dnl each of eax, ebx, ecx and edx, and also rotates
 dnl the words one byte to the left.
-dnl FIXME: AES_SBOX is zero. Any win by deleting the offset?
+dnl Uses that AES_SBOX == 0
 define(<AES_SUBST_BYTE>, <
-	movl	%eax,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%al
-	roll	<$>8,%eax
+	movl	$1,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($1)
+	roll	<$>8,$1
 
-	movl	%ebx,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%bl
-	roll	<$>8,%ebx
+	movl	$2,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($2)
+	roll	<$>8,$2
 
-	movl	%ecx,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%cl
-	roll	<$>8,%ecx
+	movl	$3,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($3)
+	roll	<$>8,$3
 
-	movl	%edx,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%dl
-	roll	<$>8,%edx>)dnl
+	movl	$4,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($4)
+	roll	<$>8,$4>)dnl
-- 
GitLab