Skip to content
Snippets Groups Projects
Commit 0b767327 authored by Niels Möller's avatar Niels Möller
Browse files

* sparc/machine.m4 (AES_ROUND): New loop invariants T0-T3, to

avoid the additions of the AES_TABLEx constants in the inner loop.

Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.8
Rev: src/nettle/sparc/machine.m4:1.8
parent f576927f
No related branches found
No related tags found
No related merge requests found
...@@ -19,14 +19,14 @@ C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, ...@@ -19,14 +19,14 @@ C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
C MA 02111-1307, USA. C MA 02111-1307, USA.
C Arguments C Arguments
define(<CTX>, <%i0>) define(<CTX>, <%i0>)
define(<T>, <%i1>) define(<T>, <%i1>)
define(<LENGTH>,<%i2>) define(<LENGTH>,<%i2>)
define(<DST>, <%i3>) define(<DST>, <%i3>)
define(<SRC>, <%i4>) define(<SRC>, <%i4>)
C AES state, two copies for unrolling C AES state, two copies for unrolling
define(<W0>, <%l0>) define(<W0>, <%l0>)
define(<W1>, <%l1>) define(<W1>, <%l1>)
...@@ -38,10 +38,13 @@ define(<X1>, <%l5>) ...@@ -38,10 +38,13 @@ define(<X1>, <%l5>)
define(<X2>, <%l6>) define(<X2>, <%l6>)
define(<X3>, <%l7>) define(<X3>, <%l7>)
C %o0 and %01 are TMP1 and TMP2 C %o0-%03 are used for loop invariants T0-T3
define(<KEY>, <%o4>) define(<KEY>, <%o4>)
define(<ROUND>, <%o5>) define(<ROUND>, <%o5>)
C %g1 and %g2 are TMP1 and TMP2
C Registers %g1-%g3 and %o0 - %o5 are free to use. C Registers %g1-%g3 and %o0 - %o5 are free to use.
C The sparc32 stack frame looks like C The sparc32 stack frame looks like
...@@ -69,7 +72,12 @@ PROLOGUE(_nettle_aes_encrypt) ...@@ -69,7 +72,12 @@ PROLOGUE(_nettle_aes_encrypt)
save %sp, -FRAME_SIZE, %sp save %sp, -FRAME_SIZE, %sp
cmp LENGTH, 0 cmp LENGTH, 0
be .Lend be .Lend
nop
C Loop invariants
add T, AES_TABLE0, T0
add T, AES_TABLE1, T1
add T, AES_TABLE2, T2
add T, AES_TABLE3, T3
.Lblock_loop: .Lblock_loop:
C Read src, and add initial subkey C Read src, and add initial subkey
...@@ -79,16 +87,16 @@ PROLOGUE(_nettle_aes_encrypt) ...@@ -79,16 +87,16 @@ PROLOGUE(_nettle_aes_encrypt)
AES_LOAD(2, SRC, KEY, W2) AES_LOAD(2, SRC, KEY, W2)
AES_LOAD(3, SRC, KEY, W3) AES_LOAD(3, SRC, KEY, W3)
C Must be even, and includes the final round
ld [AES_NROUNDS + CTX], ROUND
add SRC, 16, SRC add SRC, 16, SRC
add KEY, 16, KEY add KEY, 16, KEY
C Must be even, and includes the final round
ld [AES_NROUNDS + CTX], ROUND
nop
srl ROUND, 1, ROUND srl ROUND, 1, ROUND
C Last two rounds handled specially C Last two rounds handled specially
sub ROUND, 1, ROUND sub ROUND, 1, ROUND
.Lround_loop: .Lround_loop:
C The AES_ROUND macro uses T0,... T3
C Transform W -> X C Transform W -> X
AES_ROUND(0, T, W0, W1, W2, W3, KEY, X0) AES_ROUND(0, T, W0, W1, W2, W3, KEY, X0)
AES_ROUND(1, T, W1, W2, W3, W0, KEY, X1) AES_ROUND(1, T, W1, W2, W3, W0, KEY, X1)
......
...@@ -5,8 +5,14 @@ C understand arithmetic expressions? Mayby we don't need to use m4 ...@@ -5,8 +5,14 @@ C understand arithmetic expressions? Mayby we don't need to use m4
C eval. C eval.
C Used as temporaries by the AES macros C Used as temporaries by the AES macros
define(<TMP1>, <%o0>) define(<TMP1>, <%g1>)
define(<TMP2>, <%o1>) define(<TMP2>, <%g2>)
C Loop invariants used by AES_ROUND
define(<T0>, <%o0>)
define(<T1>, <%o1>)
define(<T2>, <%o2>)
define(<T3>, <%o3>)
C AES_LOAD(i, src, key, res) C AES_LOAD(i, src, key, res)
define(<AES_LOAD>, < define(<AES_LOAD>, <
...@@ -36,23 +42,19 @@ define(<AES_ROUND>, < ...@@ -36,23 +42,19 @@ define(<AES_ROUND>, <
srl $4, 6, TMP2 C 1 srl $4, 6, TMP2 C 1
sll TMP1, 2, TMP1 C 0 sll TMP1, 2, TMP1 C 0
and TMP2, 0x3fc, TMP2 C 1 and TMP2, 0x3fc, TMP2 C 1
add TMP1, AES_TABLE0, TMP1 C 0 ld [T0 + TMP1], $8 C 0 E0
add TMP2, AES_TABLE1, TMP2 C 1
ld [$2 + TMP1], $8 C 0 E0
srl $5, 14, TMP1 C 2 srl $5, 14, TMP1 C 2
ld [$2 + TMP2], TMP2 C 1 ld [T1 + TMP2], TMP2 C 1
and TMP1, 0x3fc, TMP1 C 2 and TMP1, 0x3fc, TMP1 C 2
xor $8, TMP2, $8 C 1 E1 xor $8, TMP2, $8 C 1 E1
srl $6, 22, TMP2 C 3 srl $6, 22, TMP2 C 3
add TMP1, AES_TABLE2, TMP1 C 2 ld [T2 + TMP1], TMP1 C 2
and TMP2, 0x3fc, TMP2 C 3 and TMP2, 0x3fc, TMP2 C 3
ld [$2 + TMP1], TMP1 C 2
add TMP2, AES_TABLE3, TMP2 C 3
xor $8, TMP1, $8 C 2 E2 xor $8, TMP1, $8 C 2 E2
ld [$7 + eval(4*$1)], TMP1 C 4 ld [$7 + eval(4*$1)], TMP1 C 4
ld [$2 + TMP2], TMP2 C 3 ld [T3 + TMP2], TMP2 C 3
xor $8, TMP1, $8 C 4 xor $8, TMP1, $8 C 4 E4
xor $8, TMP2, $8 C 3 xor $8, TMP2, $8 C 3 E3
>)dnl >)dnl
C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst) C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment