diff --git a/x86/aes-decrypt-internal.asm b/x86/aes-decrypt-internal.asm index f97fa093498836d3064a6fbc5b77d6d3d3ab50a6..ff94fa8f993ca0db48474a2dcfb954ba6e031609 100644 --- a/x86/aes-decrypt-internal.asm +++ b/x86/aes-decrypt-internal.asm @@ -139,7 +139,7 @@ PROLOGUE(_nettle_aes_decrypt) C Inverse S-box substitution mov $4,TMP .Lsubst: - AES_SUBST_BYTE(T, KEY) + AES_SUBST_BYTE(SA,SB,SC,SD,T, KEY) decl TMP jnz .Lsubst diff --git a/x86/aes-encrypt-internal.asm b/x86/aes-encrypt-internal.asm index 8fe041f0f393e27d8454fbb53c828698f90b0169..dc573948e511b53ac4adfbbb375df2b60ff0eb7e 100644 --- a/x86/aes-encrypt-internal.asm +++ b/x86/aes-encrypt-internal.asm @@ -139,7 +139,7 @@ PROLOGUE(_nettle_aes_encrypt) C S-box substitution mov $4,TMP .Lsubst: - AES_SUBST_BYTE(T, KEY) + AES_SUBST_BYTE(SA,SB,SC,SD, T, KEY) decl TMP jnz .Lsubst diff --git a/x86/aes.m4 b/x86/aes.m4 index 3c770a737c72b6f48c44eba065051715f0755636..97cb03af40dcd9b0e991efdd5020b896e4784c52 100644 --- a/x86/aes.m4 +++ b/x86/aes.m4 @@ -67,28 +67,49 @@ define(<AES_FINAL_ROUND>, < andl <$>0xff000000,$6 orl $6, $5>)dnl -dnl AES_SUBST_BYTE(table, tmp) +dnl BYTEREG(reg) gives the 8-bit register corresponding to the given 32-bit register. +dnl Use in AES_SUBST_BYTE below, and is used by both the x86 and the x86_64 assembler. +define(<BYTEREG>,<ifelse( + $1, %eax, %al, + $1, %ebx, %bl, + $1, %ecx, %cl, + $1, %edx, %dl, + dnl The rest are x86_64 only + $1, %esi, %sil, + $1, %edi, %dil, + $1, %ebp, %bpl, + $1, %esp, %spl, + $1, %r8d, %r8b, + $1, %r9d, %r9b, + $1, %r10d, %r10b, + $1, %r11d, %r11b, + $1, %r12d, %r12b, + $1, %r13d, %r13b, + $1, %r14d, %r14b, + $1, %r15d, %r15b)>)dnl + +dnl AES_SUBST_BYTE(A, B, C, D, table, tmp) dnl Substitutes the least significant byte of dnl each of eax, ebx, ecx and edx, and also rotates dnl the words one byte to the left. -dnl FIXME: AES_SBOX is zero. Any win by deleting the offset? +dnl Uses that AES_SBOX == 0 define(<AES_SUBST_BYTE>, < - movl %eax,$2 - andl <$>0x000000ff,$2 - movb AES_SBOX ($1, $2),%al - roll <$>8,%eax + movl $1,$6 + andl <$>0x000000ff,$6 + movb ($5, $6),BYTEREG($1) + roll <$>8,$1 - movl %ebx,$2 - andl <$>0x000000ff,$2 - movb AES_SBOX ($1, $2),%bl - roll <$>8,%ebx + movl $2,$6 + andl <$>0x000000ff,$6 + movb ($5, $6),BYTEREG($2) + roll <$>8,$2 - movl %ecx,$2 - andl <$>0x000000ff,$2 - movb AES_SBOX ($1, $2),%cl - roll <$>8,%ecx + movl $3,$6 + andl <$>0x000000ff,$6 + movb ($5, $6),BYTEREG($3) + roll <$>8,$3 - movl %edx,$2 - andl <$>0x000000ff,$2 - movb AES_SBOX ($1, $2),%dl - roll <$>8,%edx>)dnl + movl $4,$6 + andl <$>0x000000ff,$6 + movb ($5, $6),BYTEREG($4) + roll <$>8,$4>)dnl