Skip to content
Snippets Groups Projects
Commit 2ddcc226 authored by Niels Möller's avatar Niels Möller
Browse files

* sparc/machine.m4 (AES_FINAL_ROUND): Better scheduling, by

interleaving independent operations.

Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.10
Rev: src/nettle/sparc/machine.m4:1.11
parent 433d2b95
No related branches found
No related tags found
No related merge requests found
......@@ -137,15 +137,19 @@ EPILOGUE(_nettle_aes_encrypt)
C Some stats from adriana.lysator.liu.se (SS1000$, 85 MHz), for AES 128
C A: nettle-1.13 C-code
C B: nettle-1.13 assembler
C C: New C-code
C D: New assembler, first correct version
C E: New assembler, with basic scheduling of AES_ROUND.
C 1: nettle-1.13 C-code
C 2: nettle-1.13 assembler
C 3: New C-code
C 4: New assembler, first correct version
C 5: New assembler, with basic scheduling of AES_ROUND.
C 6: New assembpler, with loop invariants T0-T3.
C 7: New assembler, with basic scheduling also of AES_FINAL_ROUND.
C MB/s cycles/block
C A 1.2 1107
C B 2.3 572
C C 2.1 627
C D 1.8 722
C E 2.6 496
C 1 1.2 1107
C 2 2.3 572
C 3 2.1 627
C 4 1.8 722
C 5 2.6 496
C 6 3.0 437
C 7 3.1 415
......@@ -66,29 +66,25 @@ define(<AES_FINAL_ROUND>, <
ld [$7 + eval(4*$1)], TMP3
and $3, 0xff, TMP1 C 0
ldub [T + TMP1], TMP1 C 0
nop
xor TMP3, TMP1, TMP1 C 0
stb TMP1, [$8 + eval(4*$1)] C 0
srl $4, 8, TMP2 C 1
ldub [T + TMP1], TMP1 C 0
and TMP2, 0xff, TMP2 C 1
xor TMP3, TMP1, TMP1 C 0
ldub [T + TMP2], TMP2 C 1
srl TMP3, 8, TMP3 C 1
xor TMP3, TMP2, TMP2 C 1
stb TMP2, [$8 + eval(4*$1 + 1)] C 1
stb TMP1, [$8 + eval(4*$1)] C 0 E0
srl $5, 16, TMP1 C 2
srl TMP3, 8, TMP3 C 1
and TMP1, 0xff, TMP1 C 2
xor TMP3, TMP2, TMP2 C 1
ldub [T + TMP1], TMP1 C 2
srl TMP3, 8, TMP3 C 2
xor TMP3, TMP1, TMP1 C 2
stb TMP1, [$8 + eval(4*$1 + 2)] C 2
stb TMP2, [$8 + eval(4*$1 + 1)] C 1 E1
srl $6, 24, TMP2 C 3
srl TMP3, 8, TMP3 C 2
ldub [T + TMP2], TMP2 C 3
xor TMP3, TMP1, TMP1 C 2
srl TMP3, 8, TMP3 C 3
stb TMP1, [$8 + eval(4*$1 + 2)] C 2 E2
xor TMP3, TMP2, TMP2 C 3
stb TMP2, [$8 + eval(4*$1 + 3)] C 3
stb TMP2, [$8 + eval(4*$1 + 3)] C 3 E3
>)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment