diff --git a/x86/aes-decrypt-internal.asm b/x86/aes-decrypt-internal.asm
index f97fa093498836d3064a6fbc5b77d6d3d3ab50a6..ff94fa8f993ca0db48474a2dcfb954ba6e031609 100644
--- a/x86/aes-decrypt-internal.asm
+++ b/x86/aes-decrypt-internal.asm
@@ -139,7 +139,7 @@ PROLOGUE(_nettle_aes_decrypt)
 	C Inverse S-box substitution
 	mov	$4,TMP
 .Lsubst:
-	AES_SUBST_BYTE(T, KEY)
+	AES_SUBST_BYTE(SA,SB,SC,SD,T, KEY)
 
 	decl	TMP
 	jnz	.Lsubst
diff --git a/x86/aes-encrypt-internal.asm b/x86/aes-encrypt-internal.asm
index 8fe041f0f393e27d8454fbb53c828698f90b0169..dc573948e511b53ac4adfbbb375df2b60ff0eb7e 100644
--- a/x86/aes-encrypt-internal.asm
+++ b/x86/aes-encrypt-internal.asm
@@ -139,7 +139,7 @@ PROLOGUE(_nettle_aes_encrypt)
 	C S-box substitution
 	mov	$4,TMP
 .Lsubst:
-	AES_SUBST_BYTE(T, KEY)
+	AES_SUBST_BYTE(SA,SB,SC,SD, T, KEY)
 
 	decl	TMP
 	jnz	.Lsubst
diff --git a/x86/aes.m4 b/x86/aes.m4
index 3c770a737c72b6f48c44eba065051715f0755636..97cb03af40dcd9b0e991efdd5020b896e4784c52 100644
--- a/x86/aes.m4
+++ b/x86/aes.m4
@@ -67,28 +67,49 @@ define(<AES_FINAL_ROUND>, <
 	andl	<$>0xff000000,$6
 	orl	$6, $5>)dnl
 
-dnl AES_SUBST_BYTE(table, tmp)
+dnl BYTEREG(reg) gives the 8-bit register corresponding to the given 32-bit register.
+dnl Use in AES_SUBST_BYTE below, and is used by both the x86 and the x86_64 assembler.
+define(<BYTEREG>,<ifelse(
+	$1, %eax, %al,
+	$1, %ebx, %bl,
+	$1, %ecx, %cl,
+	$1, %edx, %dl,
+	dnl The rest are x86_64 only	
+	$1, %esi, %sil,
+	$1, %edi, %dil,
+	$1, %ebp, %bpl,
+	$1, %esp, %spl,
+	$1, %r8d, %r8b,
+	$1, %r9d, %r9b,
+	$1, %r10d, %r10b,
+	$1, %r11d, %r11b,
+	$1, %r12d, %r12b,
+	$1, %r13d, %r13b,
+	$1, %r14d, %r14b,
+	$1, %r15d, %r15b)>)dnl
+
+dnl AES_SUBST_BYTE(A, B, C, D, table, tmp)
 dnl Substitutes the least significant byte of
 dnl each of eax, ebx, ecx and edx, and also rotates
 dnl the words one byte to the left.
-dnl FIXME: AES_SBOX is zero. Any win by deleting the offset?
+dnl Uses that AES_SBOX == 0
 define(<AES_SUBST_BYTE>, <
-	movl	%eax,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%al
-	roll	<$>8,%eax
+	movl	$1,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($1)
+	roll	<$>8,$1
 
-	movl	%ebx,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%bl
-	roll	<$>8,%ebx
+	movl	$2,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($2)
+	roll	<$>8,$2
 
-	movl	%ecx,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%cl
-	roll	<$>8,%ecx
+	movl	$3,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($3)
+	roll	<$>8,$3
 
-	movl	%edx,$2
-	andl	<$>0x000000ff,$2
-	movb	AES_SBOX ($1, $2),%dl
-	roll	<$>8,%edx>)dnl
+	movl	$4,$6
+	andl	<$>0x000000ff,$6
+	movb	($5, $6),BYTEREG($4)
+	roll	<$>8,$4>)dnl