From 1f42276abb1da2c2a8793c4b4c3e38aa1c1edce5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Sun, 16 Oct 2005 12:27:41 +0200 Subject: [PATCH] * sparc/machine.m4 (AES_ROUND): Better scheduling, by interleaving independent operations. Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.7 Rev: src/nettle/sparc/machine.m4:1.7 --- sparc/aes-encrypt-internal.asm | 5 +++-- sparc/machine.m4 | 29 +++++++++++------------------ 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/sparc/aes-encrypt-internal.asm b/sparc/aes-encrypt-internal.asm index 8e06e03a..e5951ad1 100644 --- a/sparc/aes-encrypt-internal.asm +++ b/sparc/aes-encrypt-internal.asm @@ -133,10 +133,11 @@ C A: nettle-1.13 C-code C B: nettle-1.13 assembler C C: New C-code C D: New assembler, first correct version - +C E: New assembler, with basic scheduling of AES_ROUND. + C MB/s cycles/block C A 1.2 1107 C B 2.3 572 C C 2.1 627 C D 1.8 722 - +C E 2.6 496 diff --git a/sparc/machine.m4 b/sparc/machine.m4 index 5f3731a0..eb9e334b 100644 --- a/sparc/machine.m4 +++ b/sparc/machine.m4 @@ -33,34 +33,27 @@ C FIXME: Needs better instruction scheduling, and perhaps more temporaries C Alternatively, we can use a single table and some rotations define(<AES_ROUND>, < and $3, 0xff, TMP1 C 0 - sll TMP1, 2, TMP1 C 0 - add TMP1, AES_TABLE0, TMP1 C 0 - ld [$2 + TMP1], $8 C 0 - srl $4, 6, TMP2 C 1 + sll TMP1, 2, TMP1 C 0 and TMP2, 0x3fc, TMP2 C 1 + add TMP1, AES_TABLE0, TMP1 C 0 add TMP2, AES_TABLE1, TMP2 C 1 - ld [$2 + TMP2], TMP2 C 1 - nop - xor $8, TMP2, $8 C 1 - + ld [$2 + TMP1], $8 C 0 E0 srl $5, 14, TMP1 C 2 + ld [$2 + TMP2], TMP2 C 1 and TMP1, 0x3fc, TMP1 C 2 - add TMP1, AES_TABLE2, TMP1 C 2 - ld [$2 + TMP1], TMP1 C 2 - nop - xor $8, TMP1, $8 C 2 - + xor $8, TMP2, $8 C 1 E1 srl $6, 22, TMP2 C 3 + add TMP1, AES_TABLE2, TMP1 C 2 and TMP2, 0x3fc, TMP2 C 3 + ld [$2 + TMP1], TMP1 C 2 add TMP2, AES_TABLE3, TMP2 C 3 + xor $8, TMP1, $8 C 2 E2 + ld [$7 + eval(4*$1)], TMP1 C 4 ld [$2 + TMP2], TMP2 C 3 - nop + xor $8, TMP1, $8 C 4 xor $8, TMP2, $8 C 3 - - ld [$7 + eval(4*$1)], TMP2 C 4 - nop - xor $8, TMP2, $8>)dnl C 4 +>)dnl C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst) C Compute one word in the final round function. Output is converted to -- GitLab