Skip to content
Snippets Groups Projects
Commit 1f42276a authored by Niels Möller's avatar Niels Möller
Browse files

* sparc/machine.m4 (AES_ROUND): Better scheduling, by

interleaving independent operations.

Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.7
Rev: src/nettle/sparc/machine.m4:1.7
parent a6961580
No related branches found
No related tags found
No related merge requests found
...@@ -133,10 +133,11 @@ C A: nettle-1.13 C-code ...@@ -133,10 +133,11 @@ C A: nettle-1.13 C-code
C B: nettle-1.13 assembler C B: nettle-1.13 assembler
C C: New C-code C C: New C-code
C D: New assembler, first correct version C D: New assembler, first correct version
C E: New assembler, with basic scheduling of AES_ROUND.
C MB/s cycles/block C MB/s cycles/block
C A 1.2 1107 C A 1.2 1107
C B 2.3 572 C B 2.3 572
C C 2.1 627 C C 2.1 627
C D 1.8 722 C D 1.8 722
C E 2.6 496
...@@ -33,34 +33,27 @@ C FIXME: Needs better instruction scheduling, and perhaps more temporaries ...@@ -33,34 +33,27 @@ C FIXME: Needs better instruction scheduling, and perhaps more temporaries
C Alternatively, we can use a single table and some rotations C Alternatively, we can use a single table and some rotations
define(<AES_ROUND>, < define(<AES_ROUND>, <
and $3, 0xff, TMP1 C 0 and $3, 0xff, TMP1 C 0
sll TMP1, 2, TMP1 C 0
add TMP1, AES_TABLE0, TMP1 C 0
ld [$2 + TMP1], $8 C 0
srl $4, 6, TMP2 C 1 srl $4, 6, TMP2 C 1
sll TMP1, 2, TMP1 C 0
and TMP2, 0x3fc, TMP2 C 1 and TMP2, 0x3fc, TMP2 C 1
add TMP1, AES_TABLE0, TMP1 C 0
add TMP2, AES_TABLE1, TMP2 C 1 add TMP2, AES_TABLE1, TMP2 C 1
ld [$2 + TMP2], TMP2 C 1 ld [$2 + TMP1], $8 C 0 E0
nop
xor $8, TMP2, $8 C 1
srl $5, 14, TMP1 C 2 srl $5, 14, TMP1 C 2
ld [$2 + TMP2], TMP2 C 1
and TMP1, 0x3fc, TMP1 C 2 and TMP1, 0x3fc, TMP1 C 2
add TMP1, AES_TABLE2, TMP1 C 2 xor $8, TMP2, $8 C 1 E1
ld [$2 + TMP1], TMP1 C 2
nop
xor $8, TMP1, $8 C 2
srl $6, 22, TMP2 C 3 srl $6, 22, TMP2 C 3
add TMP1, AES_TABLE2, TMP1 C 2
and TMP2, 0x3fc, TMP2 C 3 and TMP2, 0x3fc, TMP2 C 3
ld [$2 + TMP1], TMP1 C 2
add TMP2, AES_TABLE3, TMP2 C 3 add TMP2, AES_TABLE3, TMP2 C 3
xor $8, TMP1, $8 C 2 E2
ld [$7 + eval(4*$1)], TMP1 C 4
ld [$2 + TMP2], TMP2 C 3 ld [$2 + TMP2], TMP2 C 3
nop xor $8, TMP1, $8 C 4
xor $8, TMP2, $8 C 3 xor $8, TMP2, $8 C 3
>)dnl
ld [$7 + eval(4*$1)], TMP2 C 4
nop
xor $8, TMP2, $8>)dnl C 4
C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst) C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst)
C Compute one word in the final round function. Output is converted to C Compute one word in the final round function. Output is converted to
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment