diff --git a/sparc/aes.asm b/sparc/aes.asm index c6282cf067d1b000eacc03e30e3de98387970a40..e0cd39ffd9ff7d3046496a85e53f14363648d0aa 100644 --- a/sparc/aes.asm +++ b/sparc/aes.asm @@ -23,7 +23,7 @@ define(src, %i4) define(wtxt, %l0) define(tmp, %l1) define(diff, %l2) -define(nround, %l3) +define(nrounds, %l3) ! Loop variables define(round, %l4) ! Should perhaps be 16 * round @@ -46,10 +46,14 @@ _aes_crypt: ! wtxt add %fp, -24, wtxt add %fp, -40, tmp + + ld [ctx + AES_NROUNDS], nrounds + ! Compute xor, so that we can swap efficiently. xor wtxt, tmp, diff - - ld [ctx + AES_NROUNDS], nround + + ! The loop variable will be multiplied by 16. + sll nrounds, 4, nrounds .Lblock_loop: ! Read src, and add initial subkey @@ -102,12 +106,11 @@ _aes_crypt: ! bleu .Lsource_loop ! add %o3, 4, %o3 - mov 1, round + mov 16, round - ! 4*i: i - ! This instruction copied to the delay slot of the branch here. - mov 0, i .Lround_loop: + ! 4*i + mov 0, i add T, AES_SIDX3, idx .Linner_loop: ! The comments mark which j in T->table[j][ Bj(wtxt[IDXi(i)]) ] @@ -159,8 +162,7 @@ _aes_crypt: add idx, 4, idx ! Fetch roundkey - sll round, 4, t1 - add t1, ctx, t1 + add round, ctx, t1 ld [t1+i], t1 xor t0, t3, t0 ! 0, 1, 2, 3 @@ -173,35 +175,18 @@ _aes_crypt: bleu .Linner_loop add i, 4, i -! sll round, 4, %g2 -! add %g2, ctx, %o0 -! mov 0, i -! -! .Lroundkey_loop: -! sll i, 2, %g2 -! ld [%o0], %o5 -! add i, 1, i -! ld [tmp+%g2], %g3 -! cmp i, 3 -! xor %g3, %o5, %g3 -! ! st %g3, [wtxt+%g2] -! st %g3, [tmp+%g2] -! bleu .Lroundkey_loop -! add %o0, 4, %o0 -! ! switch roles for tmp and wtxt xor wtxt, diff, wtxt - xor tmp, diff, tmp - add round, 1, round - cmp round, nround + add round, 16, round + cmp round, nrounds blu .Lround_loop - mov 0, i + xor tmp, diff, tmp - sll round, 4, %g2 + ! sll round, 4, %g2 ! final round - add %g2, ctx, %o7 + add round, ctx, %o7 mov 0, i add T, 288, %g4 .Lfinal_loop: