From 7767751692ab8079c0fb43577ca0fa63e7ea2d3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Sun, 16 Oct 2005 00:58:32 +0200
Subject: [PATCH] Implemented. Not yet working, and not optimized.

Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.3
---
 sparc/aes-encrypt-internal.asm | 87 ++++++++++++++++++++++++++++------
 1 file changed, 72 insertions(+), 15 deletions(-)

diff --git a/sparc/aes-encrypt-internal.asm b/sparc/aes-encrypt-internal.asm
index 8f87c42f..65a2350a 100644
--- a/sparc/aes-encrypt-internal.asm
+++ b/sparc/aes-encrypt-internal.asm
@@ -20,23 +20,27 @@ C MA 02111-1307, USA.
 
 
 C Arguments
-define(CTX,	%i0)
-define(T,	%i1)
-define(LENGTH,	%i2)
-define(DST,	%i3)
-define(SRC,	%i4)
+define(<CTX>,	<%i0>)
+define(<T>,	<%i1>)
+define(<LENGTH>,<%i2>)
+define(<DST>,	<%i3>)
+define(<SRC>,	<%i4>)
 
 C AES state, two copies for unrolling
 
-define(W0,	%l0)
-define(W1,	%l1)
-define(W2,	%l2)
-define(W3,	%l3)
+define(<W0>,	<%l0>)
+define(<W1>,	<%l1>)
+define(<W2>,	<%l2>)
+define(<W3>,	<%l3>)
 
-define(T0,	%l4)
-define(T1,	%l5)
-define(T2,	%l6)
-define(T3,	%l7)
+define(<T0>,	<%l4>)
+define(<T1>,	<%l5>)
+define(<T2>,	<%l6>)
+define(<T3>,	<%l7>)
+
+C %o0 and %01 are TMP1 and TMP2
+define(<KEY>,	<%o4>)
+define(<ROUND>, <%o5>)
 
 C Registers %g1-%g3 and %o0 - %o5 are free to use.
 
@@ -49,6 +53,13 @@ C %fp -  40: wtxt, uint32_t[4]
 C %fp - 136: OS register save area. 
 define(<FRAME_SIZE>, 136)
 
+	.file "aes-encrypt-internal.asm"
+
+	C _aes_encrypt(struct aes_context *ctx, 
+	C	       const struct aes_table *T,
+	C	       unsigned length, uint8_t *dst,
+	C	       uint8_t *src)
+
 	.section	".text"
 	.align 16
 	.proc	020
@@ -56,13 +67,59 @@ define(<FRAME_SIZE>, 136)
 PROLOGUE(_nettle_aes_encrypt)
 
 	save	%sp, -FRAME_SIZE, %sp
-	cmp	length, 0
+	cmp	LENGTH, 0
 	be	.Lend
+	nop
 
 .Lblock_loop:
 	C  Read src, and add initial subkey
+	add	CTX, AES_KEYS, KEY
+	AES_LOAD(0, SRC, KEY, W0)
+	AES_LOAD(1, SRC, KEY, W1)
+	AES_LOAD(2, SRC, KEY, W2)
+	AES_LOAD(3, SRC, KEY, W3)
+
+	add	SRC, 16, SRC
+	add	KEY, 16, KEY
+
+	C	Must be even, and includes the final round
+	ld	[AES_NROUNDS + CTX], ROUND
+	srl	ROUND, 1, ROUND
+
+.Lround_loop:
+	C	Transform W -> T
+	AES_ROUND(0, T, W0, W1, W2, W3, KEY, T0)
+	AES_ROUND(1, T, W1, W2, W3, W0, KEY, T1)
+	AES_ROUND(2, T, W2, W3, W0, W1, KEY, T2)
+	AES_ROUND(3, T, W3, W0, W1, W2, KEY, T3)
+
+	C	Transform T -> W
+	AES_ROUND(4, T, T0, T1, T2, T3, KEY, W0)
+	AES_ROUND(5, T, T1, T2, T3, T0, KEY, W1)
+	AES_ROUND(6, T, T2, T3, T0, T1, KEY, W2)
+	AES_ROUND(7, T, T3, T0, T1, T2, KEY, W3)
+
+	subcc	ROUND, 1, ROUND
+	bne	.Lround_loop
+	add	KEY, 32, KEY
+
+	C	Penultimate round
+	AES_ROUND(0, T, W0, W1, W2, W3, KEY, T0)
+	AES_ROUND(1, T, W1, W2, W3, W0, KEY, T1)
+	AES_ROUND(2, T, W2, W3, W0, W1, KEY, T2)
+	AES_ROUND(3, T, W3, W0, W1, W2, KEY, T3)
+
+	add	KEY, 16, KEY
+	C	Final round
+	AES_ROUND(0, T, T0, T1, T2, T3, KEY, DST)
+	AES_ROUND(1, T, T1, T2, T3, T0, KEY, DST)
+	AES_ROUND(2, T, T2, T3, T0, T1, KEY, DST)
+	AES_ROUND(3, T, T3, T0, T1, T2, KEY, DST)
+
+	subcc	LENGTH, 16, LENGTH
+	bne	.Lblock_loop
+	add	DST, 16, DST
 
-	
 .Lend:
 	ret
 	restore
-- 
GitLab