diff --git a/x86_64/aes.m4 b/x86_64/aes.m4 index 9f251c50d2e8ba8ed609e5d752d7a126b8815900..d0f0be5733f58dfa173bb6705b77766315b46a72 100644 --- a/x86_64/aes.m4 +++ b/x86_64/aes.m4 @@ -24,21 +24,6 @@ define(<HREG>,<ifelse( $1, %edx, %dh, error)>) -dnl MOVE_HREG(src, dst) -define(<MOVE_HREG>, <ifelse( - $1, %eax, <movzb %ah, $2 - >, - $1, %ebx, <movzb %bh, $2 - >, - $1, %ecx, <movzb %ch, $2 - >, - $1, %edx, <movzb %dh, $2 - >, - <movl $1, $2 - shr <$>8, $2 - and <$>0xff, $2 - >)>) - define(<XREG>,<ifelse( $1, %rax, %eax, $1, %rbx, %ebx, @@ -79,10 +64,10 @@ dnl and stores the result in the area pointed to by dst. dnl Note that x86 allows unaligned accesses. dnl Would it be preferable to interleave the loads and stores? define(<AES_STORE>, < - xorl 16($5),$1 - xorl 20($5),$2 - xorl 24($5),$3 - xorl 28($5),$4 + xorl ($5),$1 + xorl 4($5),$2 + xorl 8($5),$3 + xorl 12($5),$4 movl $1,($6) movl $2,4($6) @@ -94,7 +79,7 @@ dnl Computes one word of the AES round. Leaves result in $6. define(<AES_ROUND>, < movzb LREG($2), $7 movl AES_TABLE0 ($1, $7, 4),$6 - MOVE_HREG($3, XREG($7)) + movzb HREG($3), XREG($7) xorl AES_TABLE1 ($1, $7, 4),$6 movl $4,XREG($7) shr <$>16,$7 @@ -105,7 +90,7 @@ define(<AES_ROUND>, < xorl AES_TABLE3 ($1, $7, 4),$6>)dnl dnl AES_FINAL_ROUND(a, b, c, d, table, out, tmp) -dnl Computes one word of the final round. Leaves result in %edi. +dnl Computes one word of the final round. Leaves result in $6. dnl Note that we have to quote $ in constants. define(<AES_FINAL_ROUND>, < movzb LREG($1),$7