Commit 1f42276a authored by Niels Möller's avatar Niels Möller
Browse files

* sparc/machine.m4 (AES_ROUND): Better scheduling, by

interleaving independent operations.

Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.7
Rev: src/nettle/sparc/machine.m4:1.7
parent a6961580
......@@ -133,10 +133,11 @@ C A: nettle-1.13 C-code
C B: nettle-1.13 assembler
C C: New C-code
C D: New assembler, first correct version
C E: New assembler, with basic scheduling of AES_ROUND.
C MB/s cycles/block
C A 1.2 1107
C B 2.3 572
C C 2.1 627
C D 1.8 722
C E 2.6 496
......@@ -33,34 +33,27 @@ C FIXME: Needs better instruction scheduling, and perhaps more temporaries
C Alternatively, we can use a single table and some rotations
define(<AES_ROUND>, <
and $3, 0xff, TMP1 C 0
sll TMP1, 2, TMP1 C 0
add TMP1, AES_TABLE0, TMP1 C 0
ld [$2 + TMP1], $8 C 0
srl $4, 6, TMP2 C 1
sll TMP1, 2, TMP1 C 0
and TMP2, 0x3fc, TMP2 C 1
add TMP1, AES_TABLE0, TMP1 C 0
add TMP2, AES_TABLE1, TMP2 C 1
ld [$2 + TMP2], TMP2 C 1
nop
xor $8, TMP2, $8 C 1
ld [$2 + TMP1], $8 C 0 E0
srl $5, 14, TMP1 C 2
ld [$2 + TMP2], TMP2 C 1
and TMP1, 0x3fc, TMP1 C 2
add TMP1, AES_TABLE2, TMP1 C 2
ld [$2 + TMP1], TMP1 C 2
nop
xor $8, TMP1, $8 C 2
xor $8, TMP2, $8 C 1 E1
srl $6, 22, TMP2 C 3
add TMP1, AES_TABLE2, TMP1 C 2
and TMP2, 0x3fc, TMP2 C 3
ld [$2 + TMP1], TMP1 C 2
add TMP2, AES_TABLE3, TMP2 C 3
xor $8, TMP1, $8 C 2 E2
ld [$7 + eval(4*$1)], TMP1 C 4
ld [$2 + TMP2], TMP2 C 3
nop
xor $8, TMP1, $8 C 4
xor $8, TMP2, $8 C 3
ld [$7 + eval(4*$1)], TMP2 C 4
nop
xor $8, TMP2, $8>)dnl C 4
>)dnl
C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst)
C Compute one word in the final round function. Output is converted to
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment