diff --git a/x86_64/aes-encrypt-internal.asm b/x86_64/aes-encrypt-internal.asm
index d4d0ecd1ec2fbc43f6f689e7a5990a4e95277e48..19630ca9afef47a43cf5a1e532412e54bdda3984 100644
--- a/x86_64/aes-encrypt-internal.asm
+++ b/x86_64/aes-encrypt-internal.asm
@@ -18,9 +18,7 @@ C along with the nettle library; see the file COPYING.LIB.  If not, write to
 C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 C MA 02111-1307, USA.
 
-C Use same macros as for plain x86. FIXME: AES_SUBST_BYTE uses
-C hardcoded registers. 
-include_src(<x86/aes.m4>)
+include_src(<x86_64/aes.m4>)
 
 C Register usage:
 
@@ -47,9 +45,9 @@ define(<COUNT>,	<%r15d>)
 C Put the outer loop counter on the stack, and reuse the LENGTH
 C register as a temporary. 
 	
-define(<FRAME_COUNT>,	<(%esp)>)
-define(<TMP>,<%edx>)
-define(<TMPPTR>,<%rdx>)
+define(<FRAME_COUNT>,	<(%rsp)>)
+define(<TMP>,<%rdx>)
+
 	.file "aes-encrypt-internal.asm"
 	
 	C _aes_encrypt(struct aes_context *ctx, 
@@ -76,41 +74,41 @@ PROLOGUE(_nettle_aes_encrypt)
 	shrl	$4, LENGTH
 	movl	LENGTH, FRAME_COUNT
 .Lblock_loop:
-	movl	CTX,KEY
+	mov	CTX,KEY
 	
 	AES_LOAD(SA, SB, SC, SD, SRC, KEY)
-	addl	$16, SRC	C Increment src pointer
+	add	$16, SRC	C Increment src pointer
 
 	C  get number of rounds to do from ctx struct	
 	movl	AES_NROUNDS (CTX), COUNT
 	shrl	$1, COUNT
 	subl	$1, COUNT
 
-	addl	$16,KEY		C  point to next key
+	add	$16,KEY		C  point to next key
 	ALIGN(4)
 .Lround_loop:
-	AES_ROUND(TABLE, SA,SB,SC,SD, TA, TMPPTR)
+	AES_ROUND(TABLE, SA,SB,SC,SD, TA, TMP)
 	xorl	(KEY), TA
 
-	AES_ROUND(TABLE, SB,SC,SD,SA, TB, TMPPTR)
+	AES_ROUND(TABLE, SB,SC,SD,SA, TB, TMP)
 	xorl	4(KEY),TB
 
-	AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMPPTR)
+	AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP)
 	xorl	8(KEY),TC
 
-	AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMPPTR)
+	AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP)
 	xorl	12(KEY),TD
 
-	AES_ROUND(TABLE, TA,TB,TC,TD, SA, TMPPTR)
+	AES_ROUND(TABLE, TA,TB,TC,TD, SA, TMP)
 	xorl	16(KEY), SA
 
-	AES_ROUND(TABLE, TB,TC,TD,TA, SB, TMPPTR)
+	AES_ROUND(TABLE, TB,TC,TD,TA, SB, TMP)
 	xorl	20(KEY),SB
 
-	AES_ROUND(TABLE, TC,TD,TA,TB, SC, TMPPTR)
+	AES_ROUND(TABLE, TC,TD,TA,TB, SC, TMP)
 	xorl	24(KEY),SC
 
-	AES_ROUND(TABLE, TD,TA,TB,TC, SD, TMPPTR)
+	AES_ROUND(TABLE, TD,TA,TB,TC, SD, TMP)
 	xorl	28(KEY),SD
 	
 	addl	$32,KEY	C  point to next key
@@ -127,7 +125,7 @@ PROLOGUE(_nettle_aes_encrypt)
 	C S-box substitution
 	mov	$3, COUNT
 .Lsubst:
-	AES_SUBST_BYTE(TA,TB,TC,TD, TABLE, TMPPTR)
+	AES_SUBST_BYTE(TA,TB,TC,TD, TABLE, TMP)
 
 	decl	COUNT
 	jnz	.Lsubst
@@ -135,7 +133,7 @@ PROLOGUE(_nettle_aes_encrypt)
 	C Add last subkey, and store encrypted data
 	AES_STORE(TA,TB,TC,TD, KEY, DST)
 	
-	addl	$16, DST
+	add	$16, DST
 	decl	FRAME_COUNT
 
 	jnz	.Lblock_loop
diff --git a/x86_64/aes.m4 b/x86_64/aes.m4
index f9a85de56ac0ceb053c3342cde130d6b5fb2003b..8edf6aa6b2a0f9cf0a1af2dfb2883b91c433eacf 100644
--- a/x86_64/aes.m4
+++ b/x86_64/aes.m4
@@ -70,34 +70,34 @@ define(<AES_STORE>, <
 dnl AES_ROUND(table,a,b,c,d,out,ptr)
 dnl Computes one word of the AES round. Leaves result in $6.
 define(<AES_ROUND>, <
-	movzbl	LREG($2), $7
+	movzb	LREG($2), $7
 	movl	AES_TABLE0 ($1, $7, 4),$6
 	movl	$3, XREG($7)
-	shrl	<$>8,$7
-	andl	<$>0xff,$7
+	shr	<$>8,$7
+	and	<$>0xff,$7
 	xorl	AES_TABLE1 ($1, $7, 4),$6
 	movl	$4,XREG($7)
-	shrl	<$>16,$7
-	andl	<$>0xff,$7
+	shr	<$>16,$7
+	and	<$>0xff,$7
 	xorl	AES_TABLE2 ($1, $7, 4),$6
 	movl	$5,XREG($7)
 	xorl	AES_TABLE3 ($1, $7, 4),$6>)dnl
 
-dnl AES_FINAL_ROUND(a, b, c, d, table out, tmp)
+dnl AES_FINAL_ROUND(a, b, c, d, table, out, tmp)
 dnl Computes one word of the final round. Leaves result in %edi.
 dnl Note that we have to quote $ in constants.
 define(<AES_FINAL_ROUND>, <
-	movzb	LREG($1),$6
-	movzbl	($5, $6), $6
-	movl	$2,$7
-	andl	<$>0x0000ff00,$7
-	orl	$7, $6
-	movl	$3,$7
-	andl	<$>0x00ff0000,$7
-	orl	$7, $6
-	movl	$4,$7
-	andl	<$>0xff000000,$7
-	orl	$7, $6
+	movzb	LREG($1),$7
+	movzbl	($5, $7), $6
+	movl	$2,XREG($7)
+	andl	<$>0x0000ff00,XREG($7)
+	orl	XREG($7), $6
+	movl	$3,XREG($7)
+	andl	<$>0x00ff0000,XREG($7)
+	orl	XREG($7), $6
+	movl	$4,XREG($7)
+	andl	<$>0xff000000,XREG($7)
+	orl	XREG($7), $6
 	roll	<$>8, $6>)dnl
 
 dnl AES_SUBST_BYTE(A, B, C, D, table, tmp)
@@ -110,14 +110,14 @@ define(<AES_SUBST_BYTE>, <
 	movb	($5, $6),LREG($1)
 	roll	<$>8,$1
 
-	movzbl  LREG($2),$6
+	movzb  LREG($2),$6
 	movb	($5, $6),LREG($2)
 	roll	<$>8,$2
 
-	movzbl  LREG($3),$6
+	movzb  LREG($3),$6
 	movb	($5, $6),LREG($3)
 	roll	<$>8,$3
 
-	movzbl  LREG($4),$6
+	movzb  LREG($4),$6
 	movb	($5, $6),LREG($4)
 	roll	<$>8,$4>)dnl