diff --git a/x86_64/aes-encrypt-internal.asm b/x86_64/aes-encrypt-internal.asm index d4d0ecd1ec2fbc43f6f689e7a5990a4e95277e48..19630ca9afef47a43cf5a1e532412e54bdda3984 100644 --- a/x86_64/aes-encrypt-internal.asm +++ b/x86_64/aes-encrypt-internal.asm @@ -18,9 +18,7 @@ C along with the nettle library; see the file COPYING.LIB. If not, write to C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, C MA 02111-1307, USA. -C Use same macros as for plain x86. FIXME: AES_SUBST_BYTE uses -C hardcoded registers. -include_src(<x86/aes.m4>) +include_src(<x86_64/aes.m4>) C Register usage: @@ -47,9 +45,9 @@ define(<COUNT>, <%r15d>) C Put the outer loop counter on the stack, and reuse the LENGTH C register as a temporary. -define(<FRAME_COUNT>, <(%esp)>) -define(<TMP>,<%edx>) -define(<TMPPTR>,<%rdx>) +define(<FRAME_COUNT>, <(%rsp)>) +define(<TMP>,<%rdx>) + .file "aes-encrypt-internal.asm" C _aes_encrypt(struct aes_context *ctx, @@ -76,41 +74,41 @@ PROLOGUE(_nettle_aes_encrypt) shrl $4, LENGTH movl LENGTH, FRAME_COUNT .Lblock_loop: - movl CTX,KEY + mov CTX,KEY AES_LOAD(SA, SB, SC, SD, SRC, KEY) - addl $16, SRC C Increment src pointer + add $16, SRC C Increment src pointer C get number of rounds to do from ctx struct movl AES_NROUNDS (CTX), COUNT shrl $1, COUNT subl $1, COUNT - addl $16,KEY C point to next key + add $16,KEY C point to next key ALIGN(4) .Lround_loop: - AES_ROUND(TABLE, SA,SB,SC,SD, TA, TMPPTR) + AES_ROUND(TABLE, SA,SB,SC,SD, TA, TMP) xorl (KEY), TA - AES_ROUND(TABLE, SB,SC,SD,SA, TB, TMPPTR) + AES_ROUND(TABLE, SB,SC,SD,SA, TB, TMP) xorl 4(KEY),TB - AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMPPTR) + AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP) xorl 8(KEY),TC - AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMPPTR) + AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP) xorl 12(KEY),TD - AES_ROUND(TABLE, TA,TB,TC,TD, SA, TMPPTR) + AES_ROUND(TABLE, TA,TB,TC,TD, SA, TMP) xorl 16(KEY), SA - AES_ROUND(TABLE, TB,TC,TD,TA, SB, TMPPTR) + AES_ROUND(TABLE, TB,TC,TD,TA, SB, TMP) xorl 20(KEY),SB - AES_ROUND(TABLE, TC,TD,TA,TB, SC, TMPPTR) + AES_ROUND(TABLE, TC,TD,TA,TB, SC, TMP) xorl 24(KEY),SC - AES_ROUND(TABLE, TD,TA,TB,TC, SD, TMPPTR) + AES_ROUND(TABLE, TD,TA,TB,TC, SD, TMP) xorl 28(KEY),SD addl $32,KEY C point to next key @@ -127,7 +125,7 @@ PROLOGUE(_nettle_aes_encrypt) C S-box substitution mov $3, COUNT .Lsubst: - AES_SUBST_BYTE(TA,TB,TC,TD, TABLE, TMPPTR) + AES_SUBST_BYTE(TA,TB,TC,TD, TABLE, TMP) decl COUNT jnz .Lsubst @@ -135,7 +133,7 @@ PROLOGUE(_nettle_aes_encrypt) C Add last subkey, and store encrypted data AES_STORE(TA,TB,TC,TD, KEY, DST) - addl $16, DST + add $16, DST decl FRAME_COUNT jnz .Lblock_loop diff --git a/x86_64/aes.m4 b/x86_64/aes.m4 index f9a85de56ac0ceb053c3342cde130d6b5fb2003b..8edf6aa6b2a0f9cf0a1af2dfb2883b91c433eacf 100644 --- a/x86_64/aes.m4 +++ b/x86_64/aes.m4 @@ -70,34 +70,34 @@ define(<AES_STORE>, < dnl AES_ROUND(table,a,b,c,d,out,ptr) dnl Computes one word of the AES round. Leaves result in $6. define(<AES_ROUND>, < - movzbl LREG($2), $7 + movzb LREG($2), $7 movl AES_TABLE0 ($1, $7, 4),$6 movl $3, XREG($7) - shrl <$>8,$7 - andl <$>0xff,$7 + shr <$>8,$7 + and <$>0xff,$7 xorl AES_TABLE1 ($1, $7, 4),$6 movl $4,XREG($7) - shrl <$>16,$7 - andl <$>0xff,$7 + shr <$>16,$7 + and <$>0xff,$7 xorl AES_TABLE2 ($1, $7, 4),$6 movl $5,XREG($7) xorl AES_TABLE3 ($1, $7, 4),$6>)dnl -dnl AES_FINAL_ROUND(a, b, c, d, table out, tmp) +dnl AES_FINAL_ROUND(a, b, c, d, table, out, tmp) dnl Computes one word of the final round. Leaves result in %edi. dnl Note that we have to quote $ in constants. define(<AES_FINAL_ROUND>, < - movzb LREG($1),$6 - movzbl ($5, $6), $6 - movl $2,$7 - andl <$>0x0000ff00,$7 - orl $7, $6 - movl $3,$7 - andl <$>0x00ff0000,$7 - orl $7, $6 - movl $4,$7 - andl <$>0xff000000,$7 - orl $7, $6 + movzb LREG($1),$7 + movzbl ($5, $7), $6 + movl $2,XREG($7) + andl <$>0x0000ff00,XREG($7) + orl XREG($7), $6 + movl $3,XREG($7) + andl <$>0x00ff0000,XREG($7) + orl XREG($7), $6 + movl $4,XREG($7) + andl <$>0xff000000,XREG($7) + orl XREG($7), $6 roll <$>8, $6>)dnl dnl AES_SUBST_BYTE(A, B, C, D, table, tmp) @@ -110,14 +110,14 @@ define(<AES_SUBST_BYTE>, < movb ($5, $6),LREG($1) roll <$>8,$1 - movzbl LREG($2),$6 + movzb LREG($2),$6 movb ($5, $6),LREG($2) roll <$>8,$2 - movzbl LREG($3),$6 + movzb LREG($3),$6 movb ($5, $6),LREG($3) roll <$>8,$3 - movzbl LREG($4),$6 + movzb LREG($4),$6 movb ($5, $6),LREG($4) roll <$>8,$4>)dnl