diff --git a/sparc/aes.asm b/sparc/aes.asm index ec1283af5e72f1839fdb3b429865775f254c1a27..63cdb31cfc15f6d02cb283aed5656d595d74fccd 100644 --- a/sparc/aes.asm +++ b/sparc/aes.asm @@ -103,7 +103,53 @@ define(<AES_ROUND>, < xor t0, t3, t0 ! 0, 1, 2, 3 xor t0, t1, t0 st t0, [tmp + $1]>) + +C AES_FINAL_ROUND(i) +C Compute one word in the final round function. +C Input in wtxt, output stored in tmp + i. +C +C The comments mark which j in T->table[j][ Bj(wtxt[IDXi(i)]) ] +C the instruction is a part of. +C +C The code uses the register %o[j], aka tj, as the primary +C register for that sub-expression. True for j==1,3. +define(<AES_FINAL_ROUND>, < + ld [IDX1+$1], t1 ! 1 + ldub [wtxt+t1], t1 ! 1 + + ld [wtxt+$1], t0 ! 0 + ! IDX2(j) = j XOR 2 + lduh [wtxt+eval($1 ^ 8)], t2 ! 2 + and t0, 255, t0 ! 0 + ld [IDX3 + $1], t3 ! 3 + and t2, 255, t2 ! 2 + ldub [T+t1], t1 ! 1 + ldub [T+t0], t0 ! 0 + sll t1, 8, t1 ! 1 + + ldub [wtxt+t3], t3 ! 3 + or t0, t1, t0 ! 0, 1 + ldub [T+t2], t2 ! 2 + ldub [T+t3], t3 ! 3 + + sll t2, 16, t2 ! 2 + or t0, t2, t0 ! 0, 1, 2 + ld [key + $1], t2 + sll t3, 24, t3 ! 3 + + or t0, t3, t0 ! 0, 1, 2, 3 + xor t0, t2, t0 + + srl t0, 24, t3 + srl t0, 16, t2 + srl t0, 8, t1 + stb t1, [dst++1] + + stb t3, [dst+3] + stb t2, [dst+2] + stb t0, [dst] + add dst, 4, dst>) C The stack frame looks like C @@ -200,42 +246,43 @@ _aes_crypt: C Unrolled final loop begins C i = 0 - ld [IDX1+0], t1 ! 1 - ldub [wtxt+t1], t1 ! 1 - - ld [wtxt+0], t0 ! 0 - ! IDX2(j) = j XOR 2 - lduh [wtxt+8], t2 ! 2 - and t0, 255, t0 ! 0 - ld [IDX3 + 0], t3 ! 3 - - and t2, 255, t2 ! 2 - ldub [T+t1], t1 ! 1 - ldub [T+t0], t0 ! 0 - sll t1, 8, t1 ! 1 - - ldub [wtxt+t3], t3 ! 3 - or t0, t1, t0 ! 0, 1 - ldub [T+t2], t2 ! 2 - ldub [T+t3], t3 ! 3 - - sll t2, 16, t2 ! 2 - or t0, t2, t0 ! 0, 1, 2 - ld [key + 0], t2 - sll t3, 24, t3 ! 3 - - or t0, t3, t0 ! 0, 1, 2, 3 - xor t0, t2, t0 - - srl t0, 24, t3 - srl t0, 16, t2 - srl t0, 8, t1 - stb t1, [dst+1] - - stb t3, [dst+3] - stb t2, [dst+2] - stb t0, [dst] - add dst, 4, dst + AES_FINAL_ROUND(0) +C ld [IDX1+0], t1 ! 1 +C ldub [wtxt+t1], t1 ! 1 +C +C ld [wtxt+0], t0 ! 0 +C ! IDX2(j) = j XOR 2 +C lduh [wtxt+8], t2 ! 2 +C and t0, 255, t0 ! 0 +C ld [IDX3 + 0], t3 ! 3 +C +C and t2, 255, t2 ! 2 +C ldub [T+t1], t1 ! 1 +C ldub [T+t0], t0 ! 0 +C sll t1, 8, t1 ! 1 +C +C ldub [wtxt+t3], t3 ! 3 +C or t0, t1, t0 ! 0, 1 +C ldub [T+t2], t2 ! 2 +C ldub [T+t3], t3 ! 3 +C +C sll t2, 16, t2 ! 2 +C or t0, t2, t0 ! 0, 1, 2 +C ld [key + 0], t2 +C sll t3, 24, t3 ! 3 +C +C or t0, t3, t0 ! 0, 1, 2, 3 +C xor t0, t2, t0 +C +C srl t0, 24, t3 +C srl t0, 16, t2 +C srl t0, 8, t1 +C stb t1, [dst+1] +C +C stb t3, [dst+3] +C stb t2, [dst+2] +C stb t0, [dst] +C add dst, 4, dst C i = 1 ld [IDX1+4], t1 ! 1