diff --git a/x86/machine.m4 b/x86/machine.m4 index 3296482c6c70b59a00d896d3f2805a1a083ae872..7f12e383e272feba24a72602512be31d5b1d92ab 100644 --- a/x86/machine.m4 +++ b/x86/machine.m4 @@ -1,96 +1,96 @@ -dnl AES_LOAD(key, src) +dnl AES_LOAD(a, b, c, d, src, key) dnl Loads the next block of data from src, and add the subkey pointed dnl to by key. dnl Note that x86 allows unaligned accesses. dnl Would it be preferable to interleave the loads and stores? define(<AES_LOAD>, < - movl ($2),%eax - movl 4($2),%ebx - movl 8($2),%ecx - movl 12($2),%edx + movl ($5),$1 + movl 4($5),$2 + movl 8($5),$3 + movl 12($5),$4 - xorl ($1),%eax - xorl 4($1),%ebx - xorl 8($1),%ecx - xorl 12($1),%edx>)dnl + xorl ($6),$1 + xorl 4($6),$2 + xorl 8($6),$3 + xorl 12($6),$4>)dnl -dnl AES_STORE(key, dst) -dnl Adds the subkey pointed to by %esi to %eax-%edx, -dnl and stores the result in the area pointed to by %edi. +dnl AES_STORE(a, b, c, d, key, dst) +dnl Adds the subkey to a, b, c, d, +dnl and stores the result in the area pointed to by dst. dnl Note that x86 allows unaligned accesses. dnl Would it be preferable to interleave the loads and stores? define(<AES_STORE>, < - xorl ($1),%eax - xorl 4($1),%ebx - xorl 8($1),%ecx - xorl 12($1),%edx + xorl ($5),$1 + xorl 4($5),$2 + xorl 8($5),$3 + xorl 12($5),$4 - movl %eax,($2) - movl %ebx,4($2) - movl %ecx,8($2) - movl %edx,12($2)>)dnl + movl $1,($6) + movl $2,4($6) + movl $3,8($6) + movl $4,12($6)>)dnl -dnl AES_ROUND(table,a,b,c,d) -dnl Computes one word of the AES round. Leaves result in %edi. +dnl AES_ROUND(table,a,b,c,d,out,tmp) +dnl Computes one word of the AES round. Leaves result in $6. define(<AES_ROUND>, < - movl %e<>$2<>x, %esi - andl <$>0xff, %esi - shll <$>2,%esi C index in table - movl AES_TABLE0 + $1 (%esi),%edi - movl %e<>$3<>x, %esi - shrl <$>6,%esi - andl <$>0x000003fc,%esi C clear all but offset bytes - xorl AES_TABLE1 + $1 (%esi),%edi - movl %e<>$4<>x,%esi C third one - shrl <$>14,%esi - andl <$>0x000003fc,%esi - xorl AES_TABLE2 + $1 (%esi),%edi - movl %e<>$5<>x,%esi C fourth one - shrl <$>22,%esi - andl <$>0x000003fc,%esi - xorl AES_TABLE3 + $1 (%esi),%edi>)dnl + movl $2, $7 + andl <$>0xff, $7 + movl AES_TABLE0 ($1, $7,4),$6 + movl $3, $7 + shrl <$>6,$7 + andl <$>0x000003fc,$7 C clear all but offset bytes + xorl AES_TABLE1 ($1, $7),$6 + movl $4,$7 C third one + shrl <$>14,$7 + andl <$>0x000003fc,$7 + xorl AES_TABLE2 ($1, $7),$6 + movl $5,$7 C fourth one + shrl <$>22,$7 + andl <$>0x000003fc,$7 + xorl AES_TABLE3 ($1, $7),$6>)dnl -dnl AES_FINAL_ROUND(a, b, c, d) +dnl AES_FINAL_ROUND(a, b, c, d, out, tmp) dnl Computes one word of the final round. Leaves result in %edi. dnl Note that we have to quote $ in constants. define(<AES_FINAL_ROUND>, < C FIXME: Perform substitution on least significant byte here, C to save work later. - movl %e<>$1<>x,%edi - andl <$>0x000000ff,%edi - movl %e<>$2<>x,%ebp - andl <$>0x0000ff00,%ebp - orl %ebp,%edi - movl %e<>$3<>x,%ebp - andl <$>0x00ff0000,%ebp - orl %ebp,%edi - movl %e<>$4<>x,%ebp - andl <$>0xff000000,%ebp - orl %ebp,%edi>)dnl + movl $1,$5 + andl <$>0x000000ff,$5 + movl $2,$6 + andl <$>0x0000ff00,$6 + orl $6, $5 + movl $3,$6 + andl <$>0x00ff0000,$6 + orl $6, $5 + movl $4,$6 + andl <$>0xff000000,$6 + orl $6, $5>)dnl -dnl AES_SUBST_BYTE(table) +dnl AES_SUBST_BYTE(table, tmp) dnl Substitutes the least significant byte of dnl each of eax, ebx, ecx and edx, and also rotates dnl the words one byte to the left. +dnl FIXME: AES_SBOX is zero. Any win by deleting the offset? define(<AES_SUBST_BYTE>, < - movl %eax,%ebp - andl <$>0x000000ff,%ebp - movb AES_SBOX + $1 (%ebp),%al + movl %eax,$2 + andl <$>0x000000ff,$2 + movb AES_SBOX ($1, $2),%al roll <$>8,%eax - movl %ebx,%ebp - andl <$>0x000000ff,%ebp - movb AES_SBOX + $1 (%ebp),%bl + movl %ebx,$2 + andl <$>0x000000ff,$2 + movb AES_SBOX ($1, $2),%bl roll <$>8,%ebx - movl %ecx,%ebp - andl <$>0x000000ff,%ebp - movb AES_SBOX + $1 (%ebp),%cl + movl %ecx,$2 + andl <$>0x000000ff,$2 + movb AES_SBOX ($1, $2),%cl roll <$>8,%ecx - movl %edx,%ebp - andl <$>0x000000ff,%ebp - movb AES_SBOX + $1 (%ebp),%dl + movl %edx,$2 + andl <$>0x000000ff,$2 + movb AES_SBOX ($1, $2),%dl roll <$>8,%edx>)dnl C OFFSET(i)