diff --git a/sparc/machine.m4 b/sparc/machine.m4 index 7d22a2d5c5285fdafd02d07d4d626d83237c6f35..615e4f49c9270a450d0bdb508d11ab09f1f2ad06 100644 --- a/sparc/machine.m4 +++ b/sparc/machine.m4 @@ -4,53 +4,89 @@ C FIXME: How much can we rely on the assembler to be able to C understand arithmetic expressions? Mayby we don't need to use m4 C eval. -C AES_LOAD(i, src, key, res, t1, t2) +C Used as temporaries by the AES macros +define(<TMP1>, <%o0>) +define(<TMP2>, <%o1>) + +C AES_LOAD(i, src, key, res) define(<AES_LOAD>, < ldub [$2 + eval(4*$1)], $4 - ldub [$2 + eval(4*$1 + 1)], $5 - ldub [$2 + eval(4*$1 + 2)], $6 - sll $5, 8, $5 + ldub [$2 + eval(4*$1 + 1)], TMP1 + ldub [$2 + eval(4*$1 + 2)], TMP2 + sll TMP1, 8, TMP1 - or $4, $5, $4 ! U - ldub [$2 + eval(4*$1+3)], $5 - sll $6, 16, $6 - or $4, $6, $4 + or $4, TMP1, $4 + ldub [$2 + eval(4*$1+3)], TMP1 + sll TMP2, 16, TMP2 + or $4, TMP2, $4 - sll $5, 24, $5 - ! Get subkey - ld [$3 + eval(4*$1)], $6 - or $4, $5, $4 - xor $4, $6, $4>)dnl + sll TMP1, 24, TMP1 + C Get subkey + ld [$3 + eval(4*$1)], TMP2 + or $4, TMP1, $4 + xor $4, TMP2, $4>)dnl -C AES_ROUND(i, T, a, b, c, d, key, res, t) +C AES_ROUND(i, T, a, b, c, d, key, res) C Computes one word of the AES round C FIXME: Could use registers pointing directly to the four tables C FIXME: Needs better instruction scheduling, and perhaps more temporaries C Alternatively, we can use a single table and some rotations define(<AES_ROUND>, < - and $3, 0xff, $9 - sll $9, 2, $9 - add $9, AES_TABLE0, $9 - ld [$2 + $9], $8 - - srl $4, 6, $9 - and $9, 0x3fc, $9 - add $9, AES_TABLE1, $9 - ld [$2 + $9], $9 - xor $9, $8 - - srl $5, 14, $9 - and $9, 0x3fc, $9 - add $9, AES_TABLE2, $9 - ld [$2 + $9], $9 - xor $9, $8 - - srl $4, 22, $9 - and $9, 0x3fc, $9 - add $9, AES_TABLE3, $9 - ld [$2 + $9], $9 - xor $9, $8 - - ld [$7 + eval(4*$1)], $9 - xor $9, $8>)dnl + and $3, 0xff, TMP1 + sll TMP1, 2, TMP1 + add TMP1, AES_TABLE0, TMP1 + ld [$2 + TMP1], $8 + + srl $4, 6, TMP1 + and TMP1, 0x3fc, TMP1 + add TMP1, AES_TABLE1, TMP1 + ld [$2 + TMP1], TMP1 + xor $8, TMP1, $8 + + srl $5, 14, TMP1 + and TMP1, 0x3fc, TMP1 + add TMP1, AES_TABLE2, TMP1 + ld [$2 + TMP1], TMP1 + xor $8, TMP1, $8 + + srl $4, 22, TMP1 + and TMP1, 0x3fc, TMP1 + add TMP1, AES_TABLE3, TMP1 + ld [$2 + TMP1], TMP1 + xor $8, TMP1, $8 + + ld [$7 + eval(4*$1)], TMP1 + xor $8, TMP1, $8>)dnl + +C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst) +C Compute one word in the final round function. Output is converted to +C octets and stored at dst. Relies on AES_SBOX being zero. +define(<AES_FINAL_ROUND>, < + C Load subkey + ld [$7 + eval(4*$1)], TMP1 + + and $3, 0xff, TMP2 + ldub [T + TMP2], TMP2 + xor TMP1, TMP2, TMP2 + stb [$8 + eval(4*i)] + + srl $4, 8, TMP2 + and TMP2, 0xff, TMP2 + ldub [T + TMP2], TMP2 + srl TMP1, 8, TMP1 + xor TMP1, TMP2, TMP2 + stb [$8 + eval(4*i + 1)] + + srl $5, 16, TMP2 + and TMP2, 0xff, TMP2 + ldub [T + TMP2], TMP2 + srl TMP1, 8, TMP1 + xor TMP1, TMP2, TMP2 + stb [$8 + eval(4*i + 2)] + + srl $6, 24, TMP2 + ldub [T + TMP2], TMP2 + srl TMP1, 8, TMP1 + xor TMP1, TMP2, TMP2 + stb [$8 + eval(4*i + 1)]>)