Commit 4980578a authored by Niels Möller's avatar Niels Möller

Instruction alignment tweaks.

Rev: src/nettle/sparc/aes.asm:1.73
parent acb2beac
...@@ -82,13 +82,13 @@ _aes_crypt: ...@@ -82,13 +82,13 @@ _aes_crypt:
! The loop variable will be multiplied by 16. ! The loop variable will be multiplied by 16.
! More loop invariants ! More loop invariants
add T, AES_TABLE0, T0 add T, AES_TABLE0, T0
add T, AES_TABLE1, T1 add T, AES_TABLE1, T1
add T, AES_TABLE2, T2 add T, AES_TABLE2, T2
add T, AES_TABLE3, T3 add T, AES_TABLE3, T3
add T, AES_SIDX1, IDX1 add T, AES_SIDX1, IDX1
add T, AES_SIDX3, IDX3 add T, AES_SIDX3, IDX3
! Read src, and add initial subkey ! Read src, and add initial subkey
! Difference between ctx and src. ! Difference between ctx and src.
! NOTE: These instruction is duplicated in the delay slot, ! NOTE: These instruction is duplicated in the delay slot,
...@@ -100,7 +100,8 @@ _aes_crypt: ...@@ -100,7 +100,8 @@ _aes_crypt:
! For stop condition. Note that src is incremented in the ! For stop condition. Note that src is incremented in the
! delay slot ! delay slot
add src, 8, %g1 add src, 8, %g1
nop
.Lsource_loop: .Lsource_loop:
ldub [src+3], t3 ldub [src+3], t3
ldub [src+2], t2 ldub [src+2], t2
...@@ -125,7 +126,7 @@ _aes_crypt: ...@@ -125,7 +126,7 @@ _aes_crypt:
sub nrounds, 1, round sub nrounds, 1, round
add ctx, 16, key add ctx, 16, key
.Lround_loop: nop
! 4*i ! 4*i
! NOTE: Instruction duplicated in delay slot ! NOTE: Instruction duplicated in delay slot
mov 0, i mov 0, i
...@@ -137,55 +138,53 @@ _aes_crypt: ...@@ -137,55 +138,53 @@ _aes_crypt:
! register for that sub-expression. True for j==1,3. ! register for that sub-expression. True for j==1,3.
ld [IDX1+i], t1 ! 1 ld [IDX1+i], t1 ! 1
! IDX2(j) = j XOR 2 ! IDX2(j) = j XOR 2
xor i, 8, t2 xor i, 8, t2
add wtxt, t1, t1 ! 1 add wtxt, t1, t1 ! 1
ldub [t1+2], t1 ! 1 ldub [t1+2], t1 ! 1
ld [IDX3+i], t3 ! 3
ld [IDX3+i], t3 ! 3
sll t1, 2, t1 ! 1 sll t1, 2, t1 ! 1
ld [wtxt+i], t0 ! 0 ld [wtxt+i], t0 ! 0
lduh [wtxt+t2], t2 ! 2 lduh [wtxt+t2], t2 ! 2
and t0, 255, t0 ! 0
and t0, 255, t0 ! 0
ldub [wtxt+t3], t3 ! 3 ldub [wtxt+t3], t3 ! 3
sll t0, 2, t0 ! 0 sll t0, 2, t0 ! 0
ld [T0+t0], t0 ! 0 ld [T0+t0], t0 ! 0
and t2, 255, t2 ! 2
and t2, 255, t2 ! 2
ld [T1+t1], t1 ! 1 ld [T1+t1], t1 ! 1
sll t2, 2, t2 ! 2 sll t2, 2, t2 ! 2
ld [T2+t2], t2 ! 2 ld [T2+t2], t2 ! 2
sll t3, 2, t3 ! 3
sll t3, 2, t3 ! 3
ld [T3+t3], t3 ! 3 ld [T3+t3], t3 ! 3
xor t0, t1, t0 ! 0, 1 xor t0, t1, t0 ! 0, 1
xor t0, t2, t0 ! 0, 1, 2 xor t0, t2, t0 ! 0, 1, 2
! Fetch roundkey ! Fetch roundkey
ld [key+i], t1 ld [key+i], t1
xor t0, t3, t0 ! 0, 1, 2, 3 xor t0, t3, t0 ! 0, 1, 2, 3
xor t0, t1, t0 xor t0, t1, t0
st t0, [tmp+i] st t0, [tmp+i]
cmp i, 8
cmp i, 8
bleu .Linner_loop bleu .Linner_loop
add i, 4, i add i, 4, i
! switch roles for tmp and wtxt ! switch roles for tmp and wtxt
xor wtxt, diff, wtxt xor wtxt, diff, wtxt
xor tmp, diff, tmp xor tmp, diff, tmp
subcc round, 1, round subcc round, 1, round
add key, 16, key add key, 16, key
bne .Linner_loop bne .Linner_loop
mov 0, i mov 0, i
! final round ! final round
! Use round as the loop variable, as it's already zero ! Use round as the loop variable, as it's already zero
undefine(<i>) undefine(<i>)
define(i, round) define(i, round)
! Comments mark which j in T->sbox[Bj(wtxt[IDXj(i)])] ! Comments mark which j in T->sbox[Bj(wtxt[IDXj(i)])]
! the instruction is part of ! the instruction is part of
! NOTE: First instruction duplicated in delay slot ! NOTE: First instruction duplicated in delay slot
...@@ -193,7 +192,6 @@ define(i, round) ...@@ -193,7 +192,6 @@ define(i, round)
.Lfinal_loop: .Lfinal_loop:
! IDX2(j) = j XOR 2 ! IDX2(j) = j XOR 2
xor i, 8, t2 xor i, 8, t2
! ld [idx-16], t2 ! 2
add wtxt, t1, t1 ! 1 add wtxt, t1, t1 ! 1
ldub [t1+2], t1 ! 1 ldub [t1+2], t1 ! 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment