diff --git a/sparc/aes-encrypt-internal.asm b/sparc/aes-encrypt-internal.asm index e5951ad19857d7b18ef9669c70029beb97d3c8b3..20e2610168cc55ae55cd68de0678fc3d8e9895f9 100644 --- a/sparc/aes-encrypt-internal.asm +++ b/sparc/aes-encrypt-internal.asm @@ -19,14 +19,14 @@ C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, C MA 02111-1307, USA. -C Arguments +C Arguments define(<CTX>, <%i0>) define(<T>, <%i1>) define(<LENGTH>,<%i2>) define(<DST>, <%i3>) define(<SRC>, <%i4>) -C AES state, two copies for unrolling +C AES state, two copies for unrolling define(<W0>, <%l0>) define(<W1>, <%l1>) @@ -38,10 +38,13 @@ define(<X1>, <%l5>) define(<X2>, <%l6>) define(<X3>, <%l7>) -C %o0 and %01 are TMP1 and TMP2 +C %o0-%03 are used for loop invariants T0-T3 define(<KEY>, <%o4>) define(<ROUND>, <%o5>) +C %g1 and %g2 are TMP1 and TMP2 + + C Registers %g1-%g3 and %o0 - %o5 are free to use. C The sparc32 stack frame looks like @@ -69,7 +72,12 @@ PROLOGUE(_nettle_aes_encrypt) save %sp, -FRAME_SIZE, %sp cmp LENGTH, 0 be .Lend - nop + + C Loop invariants + add T, AES_TABLE0, T0 + add T, AES_TABLE1, T1 + add T, AES_TABLE2, T2 + add T, AES_TABLE3, T3 .Lblock_loop: C Read src, and add initial subkey @@ -79,16 +87,16 @@ PROLOGUE(_nettle_aes_encrypt) AES_LOAD(2, SRC, KEY, W2) AES_LOAD(3, SRC, KEY, W3) + C Must be even, and includes the final round + ld [AES_NROUNDS + CTX], ROUND add SRC, 16, SRC add KEY, 16, KEY - C Must be even, and includes the final round - ld [AES_NROUNDS + CTX], ROUND - nop srl ROUND, 1, ROUND C Last two rounds handled specially sub ROUND, 1, ROUND .Lround_loop: + C The AES_ROUND macro uses T0,... T3 C Transform W -> X AES_ROUND(0, T, W0, W1, W2, W3, KEY, X0) AES_ROUND(1, T, W1, W2, W3, W0, KEY, X1) diff --git a/sparc/machine.m4 b/sparc/machine.m4 index eb9e334b40ef4ec927f5408f628865d89598823d..d9e05659bf6118a599f6bd45dded87e0e4577252 100644 --- a/sparc/machine.m4 +++ b/sparc/machine.m4 @@ -5,8 +5,14 @@ C understand arithmetic expressions? Mayby we don't need to use m4 C eval. C Used as temporaries by the AES macros -define(<TMP1>, <%o0>) -define(<TMP2>, <%o1>) +define(<TMP1>, <%g1>) +define(<TMP2>, <%g2>) + +C Loop invariants used by AES_ROUND +define(<T0>, <%o0>) +define(<T1>, <%o1>) +define(<T2>, <%o2>) +define(<T3>, <%o3>) C AES_LOAD(i, src, key, res) define(<AES_LOAD>, < @@ -36,23 +42,19 @@ define(<AES_ROUND>, < srl $4, 6, TMP2 C 1 sll TMP1, 2, TMP1 C 0 and TMP2, 0x3fc, TMP2 C 1 - add TMP1, AES_TABLE0, TMP1 C 0 - add TMP2, AES_TABLE1, TMP2 C 1 - ld [$2 + TMP1], $8 C 0 E0 + ld [T0 + TMP1], $8 C 0 E0 srl $5, 14, TMP1 C 2 - ld [$2 + TMP2], TMP2 C 1 + ld [T1 + TMP2], TMP2 C 1 and TMP1, 0x3fc, TMP1 C 2 xor $8, TMP2, $8 C 1 E1 srl $6, 22, TMP2 C 3 - add TMP1, AES_TABLE2, TMP1 C 2 + ld [T2 + TMP1], TMP1 C 2 and TMP2, 0x3fc, TMP2 C 3 - ld [$2 + TMP1], TMP1 C 2 - add TMP2, AES_TABLE3, TMP2 C 3 xor $8, TMP1, $8 C 2 E2 ld [$7 + eval(4*$1)], TMP1 C 4 - ld [$2 + TMP2], TMP2 C 3 - xor $8, TMP1, $8 C 4 - xor $8, TMP2, $8 C 3 + ld [T3 + TMP2], TMP2 C 3 + xor $8, TMP1, $8 C 4 E4 + xor $8, TMP2, $8 C 3 E3 >)dnl C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst)