Commit 35a46d53 authored by Niels Möller's avatar Niels Möller
Browse files

* sparc/aes.asm (_aes_crypt): Unrolled final loop, preparing for

optimizations.
(_aes_crypt): Eliminated i from forst copy of the loop. Some
cleanup.

Rev: src/nettle/sparc/aes.asm:1.87
parent 578b7985
...@@ -298,17 +298,16 @@ define(i, round) ...@@ -298,17 +298,16 @@ define(i, round)
! NOTE: First instruction duplicated in delay slot ! NOTE: First instruction duplicated in delay slot
C i = 0 C i = 0
ld [IDX1+i], t1 ! 1 ld [IDX1+0], t1 ! 1
C .Lfinal_loop: C xor i, 8, t2
! IDX2(j) = j XOR 2
xor i, 8, t2
add wtxt, t1, t1 ! 1 add wtxt, t1, t1 ! 1
ldub [t1+2], t1 ! 1 ldub [t1+2], t1 ! 1
ld [wtxt+i], t0 ! 0 ld [wtxt+0], t0 ! 0
lduh [wtxt+t2], t2 ! 2 ! IDX2(j) = j XOR 2
lduh [wtxt+8], t2 ! 2
and t0, 255, t0 ! 0 and t0, 255, t0 ! 0
ld [IDX3 + i], t3 ! 3 ld [IDX3 + 0], t3 ! 3
and t2, 255, t2 ! 2 and t2, 255, t2 ! 2
ldub [T+t1], t1 ! 1 ldub [T+t1], t1 ! 1
...@@ -322,13 +321,12 @@ C .Lfinal_loop: ...@@ -322,13 +321,12 @@ C .Lfinal_loop:
sll t2, 16, t2 ! 2 sll t2, 16, t2 ! 2
or t0, t2, t0 ! 0, 1, 2 or t0, t2, t0 ! 0, 1, 2
ld [key + i], t2 ld [key + 0], t2
sll t3, 24, t3 ! 3 sll t3, 24, t3 ! 3
or t0, t3, t0 ! 0, 1, 2, 3 or t0, t3, t0 ! 0, 1, 2, 3
xor t0, t2, t0 xor t0, t2, t0
add i, 4, i add i, 4, i
cmp i, 12
srl t0, 24, t3 srl t0, 24, t3
srl t0, 16, t2 srl t0, 16, t2
...@@ -340,12 +338,11 @@ C .Lfinal_loop: ...@@ -340,12 +338,11 @@ C .Lfinal_loop:
stb t0, [dst] stb t0, [dst]
add dst, 4, dst add dst, 4, dst
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
C i = 1 C i = 1
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
C .Lfinal_loop:
! IDX2(j) = j XOR 2 ! IDX2(j) = j XOR 2
xor i, 8, t2 xor i, 8, t2
add wtxt, t1, t1 ! 1 add wtxt, t1, t1 ! 1
...@@ -374,7 +371,6 @@ C .Lfinal_loop: ...@@ -374,7 +371,6 @@ C .Lfinal_loop:
or t0, t3, t0 ! 0, 1, 2, 3 or t0, t3, t0 ! 0, 1, 2, 3
xor t0, t2, t0 xor t0, t2, t0
add i, 4, i add i, 4, i
cmp i, 12
srl t0, 24, t3 srl t0, 24, t3
srl t0, 16, t2 srl t0, 16, t2
...@@ -386,12 +382,11 @@ C .Lfinal_loop: ...@@ -386,12 +382,11 @@ C .Lfinal_loop:
stb t0, [dst] stb t0, [dst]
add dst, 4, dst add dst, 4, dst
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
C i = 2 C i = 2
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
C .Lfinal_loop:
! IDX2(j) = j XOR 2 ! IDX2(j) = j XOR 2
xor i, 8, t2 xor i, 8, t2
add wtxt, t1, t1 ! 1 add wtxt, t1, t1 ! 1
...@@ -420,7 +415,6 @@ C .Lfinal_loop: ...@@ -420,7 +415,6 @@ C .Lfinal_loop:
or t0, t3, t0 ! 0, 1, 2, 3 or t0, t3, t0 ! 0, 1, 2, 3
xor t0, t2, t0 xor t0, t2, t0
add i, 4, i add i, 4, i
cmp i, 12
srl t0, 24, t3 srl t0, 24, t3
srl t0, 16, t2 srl t0, 16, t2
...@@ -432,12 +426,11 @@ C .Lfinal_loop: ...@@ -432,12 +426,11 @@ C .Lfinal_loop:
stb t0, [dst] stb t0, [dst]
add dst, 4, dst add dst, 4, dst
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
C i = 3 C i = 3
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
C .Lfinal_loop:
! IDX2(j) = j XOR 2 ! IDX2(j) = j XOR 2
xor i, 8, t2 xor i, 8, t2
add wtxt, t1, t1 ! 1 add wtxt, t1, t1 ! 1
...@@ -466,7 +459,6 @@ C .Lfinal_loop: ...@@ -466,7 +459,6 @@ C .Lfinal_loop:
or t0, t3, t0 ! 0, 1, 2, 3 or t0, t3, t0 ! 0, 1, 2, 3
xor t0, t2, t0 xor t0, t2, t0
add i, 4, i add i, 4, i
cmp i, 12
srl t0, 24, t3 srl t0, 24, t3
srl t0, 16, t2 srl t0, 16, t2
...@@ -478,7 +470,6 @@ C .Lfinal_loop: ...@@ -478,7 +470,6 @@ C .Lfinal_loop:
stb t0, [dst] stb t0, [dst]
add dst, 4, dst add dst, 4, dst
C bleu .Lfinal_loop
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
C Unrolled final loop ends C Unrolled final loop ends
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment