From bc5482c2dd0dfa9369be22462496cd858150bc8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Mon, 11 Feb 2002 22:54:25 +0100 Subject: [PATCH] Now almost as fast as the C code. Rev: src/nettle/sparc/aes.asm:1.5 --- sparc/aes.asm | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sparc/aes.asm b/sparc/aes.asm index 8860acb0..30029552 100644 --- a/sparc/aes.asm +++ b/sparc/aes.asm @@ -59,7 +59,8 @@ key_addition_8to32: retl nop - ! FIXME: Inline, unroll? +! key_addition32(const uint32_t *txt, const uint32_t *keys, uint32_t *out) + .size key_addition_8to32,.LLfe1-key_addition_8to32 .align 4 .type key_addition32,#function @@ -67,27 +68,25 @@ key_addition_8to32: key_addition32: ! Use %g2 and %g3 as temporaries, %o3 as counter mov 0, %o3 + ! Decrement out, so we can increment the counter earlier in the loop + sub %o2, 4, %o2 .LL26: - ! Get *txt++ - ld [%o0], %g2 - add %o0, 4, %o0 - ! Get *keys++ - ld [%o1], %g3 - add %o1, 4, %o1 + ! Get txt[i] + ld [%o0+%o3], %g2 + ! Get keys[i] + ld [%o1+%o3], %g3 + cmp %o3, 12 + add %o3, 4, %o3 + xor %g2, %g3, %g3 - st %g3, [%o2] - ! Incrementing %o2 in the delay slot - add %o3, 1, %o3 - ! FIXME: Unroll or inline? - cmp %o3, 3 bleu .LL26 - add %o2, 4, %o2 + st %g3, [%o2+%o3] retl nop -! ! And three more times + ! ! mov %o0, %o4 ! mov 0, %o3 -- GitLab