From 7767751692ab8079c0fb43577ca0fa63e7ea2d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Sun, 16 Oct 2005 00:58:32 +0200 Subject: [PATCH] Implemented. Not yet working, and not optimized. Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.3 --- sparc/aes-encrypt-internal.asm | 87 ++++++++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 15 deletions(-) diff --git a/sparc/aes-encrypt-internal.asm b/sparc/aes-encrypt-internal.asm index 8f87c42f..65a2350a 100644 --- a/sparc/aes-encrypt-internal.asm +++ b/sparc/aes-encrypt-internal.asm @@ -20,23 +20,27 @@ C MA 02111-1307, USA. C Arguments -define(CTX, %i0) -define(T, %i1) -define(LENGTH, %i2) -define(DST, %i3) -define(SRC, %i4) +define(<CTX>, <%i0>) +define(<T>, <%i1>) +define(<LENGTH>,<%i2>) +define(<DST>, <%i3>) +define(<SRC>, <%i4>) C AES state, two copies for unrolling -define(W0, %l0) -define(W1, %l1) -define(W2, %l2) -define(W3, %l3) +define(<W0>, <%l0>) +define(<W1>, <%l1>) +define(<W2>, <%l2>) +define(<W3>, <%l3>) -define(T0, %l4) -define(T1, %l5) -define(T2, %l6) -define(T3, %l7) +define(<T0>, <%l4>) +define(<T1>, <%l5>) +define(<T2>, <%l6>) +define(<T3>, <%l7>) + +C %o0 and %01 are TMP1 and TMP2 +define(<KEY>, <%o4>) +define(<ROUND>, <%o5>) C Registers %g1-%g3 and %o0 - %o5 are free to use. @@ -49,6 +53,13 @@ C %fp - 40: wtxt, uint32_t[4] C %fp - 136: OS register save area. define(<FRAME_SIZE>, 136) + .file "aes-encrypt-internal.asm" + + C _aes_encrypt(struct aes_context *ctx, + C const struct aes_table *T, + C unsigned length, uint8_t *dst, + C uint8_t *src) + .section ".text" .align 16 .proc 020 @@ -56,13 +67,59 @@ define(<FRAME_SIZE>, 136) PROLOGUE(_nettle_aes_encrypt) save %sp, -FRAME_SIZE, %sp - cmp length, 0 + cmp LENGTH, 0 be .Lend + nop .Lblock_loop: C Read src, and add initial subkey + add CTX, AES_KEYS, KEY + AES_LOAD(0, SRC, KEY, W0) + AES_LOAD(1, SRC, KEY, W1) + AES_LOAD(2, SRC, KEY, W2) + AES_LOAD(3, SRC, KEY, W3) + + add SRC, 16, SRC + add KEY, 16, KEY + + C Must be even, and includes the final round + ld [AES_NROUNDS + CTX], ROUND + srl ROUND, 1, ROUND + +.Lround_loop: + C Transform W -> T + AES_ROUND(0, T, W0, W1, W2, W3, KEY, T0) + AES_ROUND(1, T, W1, W2, W3, W0, KEY, T1) + AES_ROUND(2, T, W2, W3, W0, W1, KEY, T2) + AES_ROUND(3, T, W3, W0, W1, W2, KEY, T3) + + C Transform T -> W + AES_ROUND(4, T, T0, T1, T2, T3, KEY, W0) + AES_ROUND(5, T, T1, T2, T3, T0, KEY, W1) + AES_ROUND(6, T, T2, T3, T0, T1, KEY, W2) + AES_ROUND(7, T, T3, T0, T1, T2, KEY, W3) + + subcc ROUND, 1, ROUND + bne .Lround_loop + add KEY, 32, KEY + + C Penultimate round + AES_ROUND(0, T, W0, W1, W2, W3, KEY, T0) + AES_ROUND(1, T, W1, W2, W3, W0, KEY, T1) + AES_ROUND(2, T, W2, W3, W0, W1, KEY, T2) + AES_ROUND(3, T, W3, W0, W1, W2, KEY, T3) + + add KEY, 16, KEY + C Final round + AES_ROUND(0, T, T0, T1, T2, T3, KEY, DST) + AES_ROUND(1, T, T1, T2, T3, T0, KEY, DST) + AES_ROUND(2, T, T2, T3, T0, T1, KEY, DST) + AES_ROUND(3, T, T3, T0, T1, T2, KEY, DST) + + subcc LENGTH, 16, LENGTH + bne .Lblock_loop + add DST, 16, DST - .Lend: ret restore -- GitLab