From 7aceb0b13bf574b371017f95ee9b9c36fa39cc5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Fri, 15 Feb 2002 22:47:34 +0100 Subject: [PATCH] (_aes_crypt): Consistently use %l4, aka i, as the variable for the innermost loops. Preparations for double buffering. Rev: src/nettle/sparc/aes.asm:1.30 --- sparc/aes.asm | 71 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/sparc/aes.asm b/sparc/aes.asm index 7a317f2d..8abec406 100644 --- a/sparc/aes.asm +++ b/sparc/aes.asm @@ -22,31 +22,32 @@ define(src, %i4) ! Loop invariants define(wtxt, %l0) define(tmp, %l1) -define(nround, %l2) +define(diff, %l2) +define(nround, %l3) ! Loop variables -define(round, %l3) ! Should perhaps be 16 * round -define(i, %l4) +define(round, %l4) ! Should perhaps be 16 * round +define(i, %l5) _aes_crypt: ! Why -136? save %sp, -136, %sp -! Why this moving around of the input parameters? cmp length, 0 be .Lend ! wtxt add %fp, -24, wtxt add %fp, -40, tmp - + ! Compute xor, so that we can swap efficiently. + xor wtxt, tmp, diff .Lblock_loop: ! Read src, and add initial subkey - mov -4, %o4 + mov -4, i .Lsource_loop: - add %o4, 4, %o4 + add i, 4, i - add %o4, src, %o5 + add i, src, %o5 ldub [%o5+3], %g2 ldub [%o5+2], %g3 @@ -54,16 +55,16 @@ _aes_crypt: ldub [%o5+1], %o0 sll %g3, 16, %g3 or %g2, %g3, %g2 - ldub [src+%o4], %o5 + ldub [src+i], %o5 sll %o0, 8, %o0 - ld [ctx+%o4], %g3 + ld [ctx+i], %g3 or %g2, %o0, %g2 or %g2, %o5, %g2 xor %g2, %g3, %g2 - cmp %o4, 12 + cmp i, 12 bleu .Lsource_loop - st %g2, [wtxt+%o4] + st %g2, [wtxt+i] ! ! Read a little-endian word ! ldub [src+3], %g2 @@ -99,13 +100,14 @@ _aes_crypt: ! wtxt mov wtxt, %g4 - ! 4*i: %o3 - mov 0, %o3 + ! 4*i: i + ! This instruction copied to the delay slot of the branch here. + mov 0, i .Lround_loop: add T, AES_SIDX3, %o2 .Linner_loop: ! The comments mark which T->table[0][ B0(wtxt[IDX0(j)]) ] - ! the isntruction is a part of. + ! the instruction is a part of. ! AES_SIDX1 ld [%o2-32], %g3 ! 1 @@ -121,7 +123,7 @@ _aes_crypt: sll %o0, 2, %o0 ! 1 ! wtxt[i] - ld [%g4+%o3], %o5 ! 0 + ld [%g4+i], %o5 ! 0 ! wtxt[IDX2...] lduh [%g4+%o4], %g3 ! 2 @@ -148,34 +150,45 @@ _aes_crypt: xor %g2, %o0, %g2 ! 0, 1, 2 add %o2, 4, %o2 - + +! ! Fetch roundkey +! sll round, 4, %o5 +! add %o5, ctx, %o5 +! ld [%o5], %o5 + xor %g2, %g3, %g2 ! 0, 1, 2, 3 - st %g2, [tmp+%o3] - cmp %o3, 8 +! xor %g2, %o5, %g2 + st %g2, [tmp+i] + + cmp i, 8 bleu .Linner_loop - add %o3, 4, %o3 + add i, 4, i sll round, 4, %g2 add %g2, ctx, %o0 - mov 0, %i5 - mov wtxt, %o3 - mov tmp, %o4 + mov 0, i + .Lroundkey_loop: - sll %i5, 2, %g2 + sll i, 2, %g2 ld [%o0], %o5 - add %i5, 1, %i5 - ld [%o4+%g2], %g3 - cmp %i5, 3 + add i, 1, i + ld [tmp+%g2], %g3 + cmp i, 3 xor %g3, %o5, %g3 - st %g3, [%o3+%g2] + st %g3, [wtxt+%g2] bleu .Lroundkey_loop add %o0, 4, %o0 + + ! switch roles for tmp and wtxt +! xor wtxt, diff, wtxt +! xor tmp, diff, tmp + add round, 1, round cmp round, nround blu .Lround_loop - mov 0, %o3 + mov 0, i sll round, 4, %g2 -- GitLab