From b076a849beeffa1fcb5fd6183f44d26c910fc8ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Thu, 16 May 2002 01:23:58 +0200
Subject: [PATCH] (_aes_crypt): Unrolled source loop.

Rev: src/nettle/sparc/aes.asm:1.111
---
 sparc/aes.asm | 98 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/sparc/aes.asm b/sparc/aes.asm
index 318b5ce0..e7d00774 100644
--- a/sparc/aes.asm
+++ b/sparc/aes.asm
@@ -63,6 +63,24 @@ define(t1, %o1)
 define(t2, %o2)
 define(t3, %o3)
 
+C AES_LOAD(i)
+C Get one word of input, XOR with first subkey, store in wtxt
+define(<AES_LOAD>, <
+	ldub	[src + $1], t0
+	ldub	[src + $1 + 1], t1
+	sll	t1, 8, t1
+	or	t0, t1, t0
+	ldub	[src + $1 + 2], t2
+	sll	t2, 16, t2
+	or	t0, t2, t0
+	ldub	[src + $1 + 3], t1
+	sll	t1, 24, t1
+	or	t0, t1, t0
+	ld	[key + $1], t2
+	xor	t0, t2, t0
+	st	t0, [wtxt + $1]
+	>)dnl
+
 C AES_ROUND(i)
 C Compute one word in the round function. 
 C Input in wtxt, output stored in tmp + i.
@@ -186,8 +204,80 @@ _aes_crypt:
 	! For stop condition. Note that src is incremented in the
 	! delay slot
 	add	src, 8, %g1
+
+	C AES_LOAD(0)	! i = 0
+	C AES_LOAD(4)	! i = 1
+	C AES_LOAD(8)	! i = 2
+	C AES_LOAD(12)	! i = 3
+	C add	src, 16, src
+			
+C .Lsource_loop:
+	C Begin loop
+	C i = 0
+	ldub	[src+3], t3
+	ldub	[src+2], t2
+	sll	t3, 24, t3
+	ldub	[src+1], t1
+	
+	sll	t2, 16, t2
+	or	t3, t2, t3
+	ldub	[src], t0
+	sll	t1, 8, t1
+	
+	! Get subkey
+	ld	[src+%g2], t2
+	or	t3, t1, t3
+	or	t3, t0, t3
+	xor	t3, t2, t3
+	
+	C cmp	src, %g1
+	st	t3, [src+%g3]
+	C bleu	.Lsource_loop
+	add	src, 4, src
+
+	C i = 1
+	ldub	[src+3], t3
+	ldub	[src+2], t2
+	sll	t3, 24, t3
+	ldub	[src+1], t1
+	
+	sll	t2, 16, t2
+	or	t3, t2, t3
+	ldub	[src], t0
+	sll	t1, 8, t1
+	
+	! Get subkey
+	ld	[src+%g2], t2
+	or	t3, t1, t3
+	or	t3, t0, t3
+	xor	t3, t2, t3
+	
+	C cmp	src, %g1
+	st	t3, [src+%g3]
+	C bleu	.Lsource_loop
+	add	src, 4, src
+	C i = 2
+	ldub	[src+3], t3
+	ldub	[src+2], t2
+	sll	t3, 24, t3
+	ldub	[src+1], t1
+	
+	sll	t2, 16, t2
+	or	t3, t2, t3
+	ldub	[src], t0
+	sll	t1, 8, t1
 	
-.Lsource_loop:
+	! Get subkey
+	ld	[src+%g2], t2
+	or	t3, t1, t3
+	or	t3, t0, t3
+	xor	t3, t2, t3
+	
+	C cmp	src, %g1
+	st	t3, [src+%g3]
+	C bleu	.Lsource_loop
+	add	src, 4, src
+	C i = 3
 	ldub	[src+3], t3
 	ldub	[src+2], t2
 	sll	t3, 24, t3
@@ -204,10 +294,12 @@ _aes_crypt:
 	or	t3, t0, t3
 	xor	t3, t2, t3
 	
-	cmp	src, %g1
+	C cmp	src, %g1
 	st	t3, [src+%g3]
-	bleu	.Lsource_loop
+	C bleu	.Lsource_loop
 	add	src, 4, src
+
+	C End loop
 	
 	sub	nrounds, 1, round
 	add	ctx, 16, key
-- 
GitLab