diff --git a/sparc/aes.asm b/sparc/aes.asm index 7a4f0816465285c1b74348d4ff5a3063a5871772..d1c55e84b338a71699dded5ae2d1aab71c5de73a 100644 --- a/sparc/aes.asm +++ b/sparc/aes.asm @@ -83,6 +83,7 @@ _aes_crypt: add %fp, -24, wtxt add %fp, -40, tmp + ld [ctx + AES_NROUNDS], nrounds ! Compute xor, so that we can swap efficiently. xor wtxt, tmp, diff @@ -142,7 +143,46 @@ _aes_crypt: ! ! The code uses the register %o[j], aka tj, as the primary ! register for that sub-expression. True for j==1,3. + + C i = 0 + ld [IDX1+i], t1 ! 1 + + ! IDX2(j) = j XOR 2 + xor i, 8, t2 + add wtxt, t1, t1 ! 1 + ldub [t1+2], t1 ! 1 + ld [IDX3+i], t3 ! 3 + + sll t1, 2, t1 ! 1 + ld [wtxt+i], t0 ! 0 + lduh [wtxt+t2], t2 ! 2 + and t0, 255, t0 ! 0 + + ldub [wtxt+t3], t3 ! 3 + sll t0, 2, t0 ! 0 + ld [T0+t0], t0 ! 0 + and t2, 255, t2 ! 2 + + ld [T1+t1], t1 ! 1 + sll t2, 2, t2 ! 2 + ld [T2+t2], t2 ! 2 + sll t3, 2, t3 ! 3 + + ld [T3+t3], t3 ! 3 + xor t0, t1, t0 ! 0, 1 + xor t0, t2, t0 ! 0, 1, 2 + ! Fetch roundkey + ld [key+i], t1 + + xor t0, t3, t0 ! 0, 1, 2, 3 + xor t0, t1, t0 + st t0, [tmp+i] + C cmp i, 8 + C bleu .Linner_loop + add i, 4, i + + C i = 1 ld [IDX1+i], t1 ! 1 ! IDX2(j) = j XOR 2 @@ -175,10 +215,89 @@ _aes_crypt: xor t0, t3, t0 ! 0, 1, 2, 3 xor t0, t1, t0 st t0, [tmp+i] - cmp i, 8 + C cmp i, 8 - bleu .Linner_loop + C bleu .Linner_loop add i, 4, i + + C = 2 + ld [IDX1+i], t1 ! 1 + + ! IDX2(j) = j XOR 2 + xor i, 8, t2 + add wtxt, t1, t1 ! 1 + ldub [t1+2], t1 ! 1 + ld [IDX3+i], t3 ! 3 + + sll t1, 2, t1 ! 1 + ld [wtxt+i], t0 ! 0 + lduh [wtxt+t2], t2 ! 2 + and t0, 255, t0 ! 0 + + ldub [wtxt+t3], t3 ! 3 + sll t0, 2, t0 ! 0 + ld [T0+t0], t0 ! 0 + and t2, 255, t2 ! 2 + + ld [T1+t1], t1 ! 1 + sll t2, 2, t2 ! 2 + ld [T2+t2], t2 ! 2 + sll t3, 2, t3 ! 3 + + ld [T3+t3], t3 ! 3 + xor t0, t1, t0 ! 0, 1 + xor t0, t2, t0 ! 0, 1, 2 + ! Fetch roundkey + ld [key+i], t1 + + xor t0, t3, t0 ! 0, 1, 2, 3 + xor t0, t1, t0 + st t0, [tmp+i] + C cmp i, 8 + + C bleu .Linner_loop + add i, 4, i + + C = 3 + ld [IDX1+i], t1 ! 1 + + ! IDX2(j) = j XOR 2 + xor i, 8, t2 + add wtxt, t1, t1 ! 1 + ldub [t1+2], t1 ! 1 + ld [IDX3+i], t3 ! 3 + + sll t1, 2, t1 ! 1 + ld [wtxt+i], t0 ! 0 + lduh [wtxt+t2], t2 ! 2 + and t0, 255, t0 ! 0 + + ldub [wtxt+t3], t3 ! 3 + sll t0, 2, t0 ! 0 + ld [T0+t0], t0 ! 0 + and t2, 255, t2 ! 2 + + ld [T1+t1], t1 ! 1 + sll t2, 2, t2 ! 2 + ld [T2+t2], t2 ! 2 + sll t3, 2, t3 ! 3 + + ld [T3+t3], t3 ! 3 + xor t0, t1, t0 ! 0, 1 + xor t0, t2, t0 ! 0, 1, 2 + ! Fetch roundkey + ld [key+i], t1 + + xor t0, t3, t0 ! 0, 1, 2, 3 + xor t0, t1, t0 + st t0, [tmp+i] + C cmp i, 8 + + C bleu .Linner_loop + add i, 4, i + + C End loop + ! switch roles for tmp and wtxt xor wtxt, diff, wtxt xor tmp, diff, tmp @@ -247,7 +366,7 @@ define(i, round) sub wtxt, src, %g3 .Lend: - add %sp, FRAME_SIZE, %fp + C add %sp, FRAME_SIZE, %fp ret restore .LLFE1: