Commit 578b7985 authored by Niels Möller's avatar Niels Möller
Browse files

* sparc/aes.asm (_aes_crypt): Unrolled final loop, preparing for

optimizations.

Rev: src/nettle/sparc/aes.asm:1.86
parent 51047b57
......@@ -58,6 +58,7 @@ define(T1, %l7)
define(T2, %g5)
define(T3, %o7)
define(IDX1, %i5)
C define(IDX1, <T + AES_SIDX1 >)
define(IDX3, %o5)
! Teporaries
......@@ -141,12 +142,14 @@ _aes_crypt:
! The code uses the register %o[j], aka tj, as the primary
! register for that sub-expression. True for j==1,3.
C Unrolled inner loop begins
C i = 0
ld [IDX1], t1 ! 1
ld [IDX1+0], t1 ! 1
add wtxt, t1, t1 ! 1
ldub [t1+2], t1 ! 1
ld [IDX3], t3 ! 3
ld [IDX3+0], t3 ! 3
sll t1, 2, t1 ! 1
ld [wtxt], t0 ! 0
......@@ -273,7 +276,7 @@ _aes_crypt:
xor t0, t1, t0
st t0, [tmp+12]
C End of unrolled loop
C Unrolled inner loop ends
! switch roles for tmp and wtxt
xor wtxt, diff, wtxt
......@@ -290,9 +293,151 @@ define(i, round)
! Comments mark which j in T->sbox[Bj(wtxt[IDXj(i)])]
! the instruction is part of
C Unrolled final loop begins
! NOTE: First instruction duplicated in delay slot
C i = 0
ld [IDX1+i], t1 ! 1
C .Lfinal_loop:
! IDX2(j) = j XOR 2
xor i, 8, t2
add wtxt, t1, t1 ! 1
ldub [t1+2], t1 ! 1
ld [wtxt+i], t0 ! 0
lduh [wtxt+t2], t2 ! 2
and t0, 255, t0 ! 0
ld [IDX3 + i], t3 ! 3
and t2, 255, t2 ! 2
ldub [T+t1], t1 ! 1
ldub [T+t0], t0 ! 0
sll t1, 8, t1 ! 1
ldub [wtxt+t3], t3 ! 3
or t0, t1, t0 ! 0, 1
ldub [T+t2], t2 ! 2
ldub [T+t3], t3 ! 3
sll t2, 16, t2 ! 2
or t0, t2, t0 ! 0, 1, 2
ld [key + i], t2
sll t3, 24, t3 ! 3
or t0, t3, t0 ! 0, 1, 2, 3
xor t0, t2, t0
add i, 4, i
cmp i, 12
srl t0, 24, t3
srl t0, 16, t2
srl t0, 8, t1
stb t1, [dst+1]
stb t3, [dst+3]
stb t2, [dst+2]
stb t0, [dst]
add dst, 4, dst
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1
C i = 1
ld [IDX1+i], t1 ! 1
C .Lfinal_loop:
! IDX2(j) = j XOR 2
xor i, 8, t2
add wtxt, t1, t1 ! 1
ldub [t1+2], t1 ! 1
ld [wtxt+i], t0 ! 0
lduh [wtxt+t2], t2 ! 2
and t0, 255, t0 ! 0
ld [IDX3 + i], t3 ! 3
and t2, 255, t2 ! 2
ldub [T+t1], t1 ! 1
ldub [T+t0], t0 ! 0
sll t1, 8, t1 ! 1
ldub [wtxt+t3], t3 ! 3
or t0, t1, t0 ! 0, 1
ldub [T+t2], t2 ! 2
ldub [T+t3], t3 ! 3
sll t2, 16, t2 ! 2
or t0, t2, t0 ! 0, 1, 2
ld [key + i], t2
sll t3, 24, t3 ! 3
or t0, t3, t0 ! 0, 1, 2, 3
xor t0, t2, t0
add i, 4, i
cmp i, 12
srl t0, 24, t3
srl t0, 16, t2
srl t0, 8, t1
stb t1, [dst+1]
stb t3, [dst+3]
stb t2, [dst+2]
stb t0, [dst]
add dst, 4, dst
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1
C i = 2
ld [IDX1+i], t1 ! 1
C .Lfinal_loop:
! IDX2(j) = j XOR 2
xor i, 8, t2
add wtxt, t1, t1 ! 1
ldub [t1+2], t1 ! 1
ld [wtxt+i], t0 ! 0
lduh [wtxt+t2], t2 ! 2
and t0, 255, t0 ! 0
ld [IDX3 + i], t3 ! 3
and t2, 255, t2 ! 2
ldub [T+t1], t1 ! 1
ldub [T+t0], t0 ! 0
sll t1, 8, t1 ! 1
ldub [wtxt+t3], t3 ! 3
or t0, t1, t0 ! 0, 1
ldub [T+t2], t2 ! 2
ldub [T+t3], t3 ! 3
sll t2, 16, t2 ! 2
or t0, t2, t0 ! 0, 1, 2
ld [key + i], t2
sll t3, 24, t3 ! 3
or t0, t3, t0 ! 0, 1, 2, 3
xor t0, t2, t0
add i, 4, i
cmp i, 12
srl t0, 24, t3
srl t0, 16, t2
srl t0, 8, t1
stb t1, [dst+1]
stb t3, [dst+3]
stb t2, [dst+2]
stb t0, [dst]
add dst, 4, dst
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1
.Lfinal_loop:
C i = 3
ld [IDX1+i], t1 ! 1
C .Lfinal_loop:
! IDX2(j) = j XOR 2
xor i, 8, t2
add wtxt, t1, t1 ! 1
......@@ -333,8 +478,11 @@ define(i, round)
stb t0, [dst]
add dst, 4, dst
bleu .Lfinal_loop
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1
C Unrolled final loop ends
addcc length, -16, length
sub ctx, src, %g2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment