From 4e764ed6fae11a08e95fe7ce6cd163eb670c3abf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Fri, 6 Mar 2009 19:13:07 +0100
Subject: [PATCH] * x86_64/aes.m4 (MOVE_HREG): Deleted, no longer needed.
 (AES_STORE): Reduced offsets. (AES_ROUND): Use HREG directly, not MOVE_HREG.

Rev: nettle/x86_64/aes.m4:1.7
---
 x86_64/aes.m4 | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/x86_64/aes.m4 b/x86_64/aes.m4
index 9f251c50..d0f0be57 100644
--- a/x86_64/aes.m4
+++ b/x86_64/aes.m4
@@ -24,21 +24,6 @@ define(<HREG>,<ifelse(
 	$1, %edx, %dh,
 	error)>)
 
-dnl MOVE_HREG(src, dst)
-define(<MOVE_HREG>, <ifelse(
-	$1, %eax, <movzb	%ah, $2
-	>,
-	$1, %ebx, <movzb	%bh, $2
-	>,
-	$1, %ecx, <movzb	%ch, $2
-	>,
-	$1, %edx, <movzb	%dh, $2
-	>,
-	<movl	$1, $2
-	shr	<$>8, $2
-	and	<$>0xff, $2
-	>)>)
-
 define(<XREG>,<ifelse(
 	$1, %rax, %eax,
 	$1, %rbx, %ebx,
@@ -79,10 +64,10 @@ dnl and stores the result in the area pointed to by dst.
 dnl Note that x86 allows unaligned accesses.
 dnl Would it be preferable to interleave the loads and stores?
 define(<AES_STORE>, <
-	xorl	16($5),$1
-	xorl	20($5),$2
-	xorl	24($5),$3
-	xorl	28($5),$4
+	xorl	($5),$1
+	xorl	4($5),$2
+	xorl	8($5),$3
+	xorl	12($5),$4
 
 	movl	$1,($6)
 	movl	$2,4($6)
@@ -94,7 +79,7 @@ dnl Computes one word of the AES round. Leaves result in $6.
 define(<AES_ROUND>, <
 	movzb	LREG($2), $7
 	movl	AES_TABLE0 ($1, $7, 4),$6
-	MOVE_HREG($3, XREG($7))
+	movzb	HREG($3), XREG($7)
 	xorl	AES_TABLE1 ($1, $7, 4),$6
 	movl	$4,XREG($7)
 	shr	<$>16,$7
@@ -105,7 +90,7 @@ define(<AES_ROUND>, <
 	xorl	AES_TABLE3 ($1, $7, 4),$6>)dnl
 
 dnl AES_FINAL_ROUND(a, b, c, d, table, out, tmp)
-dnl Computes one word of the final round. Leaves result in %edi.
+dnl Computes one word of the final round. Leaves result in $6.
 dnl Note that we have to quote $ in constants.
 define(<AES_FINAL_ROUND>, <
 	movzb	LREG($1),$7
-- 
GitLab