From 35a46d53bec9efeec923dcee95fde38323020772 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Wed, 15 May 2002 22:32:26 +0200
Subject: [PATCH] * sparc/aes.asm (_aes_crypt): Unrolled final loop, preparing
 for optimizations. (_aes_crypt): Eliminated i from forst copy of the loop.
 Some cleanup.

Rev: src/nettle/sparc/aes.asm:1.87
---
 sparc/aes.asm | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/sparc/aes.asm b/sparc/aes.asm
index 6731c259..c2bd6764 100644
--- a/sparc/aes.asm
+++ b/sparc/aes.asm
@@ -298,17 +298,16 @@ define(i, round)
 	! NOTE: First instruction duplicated in delay slot
 
 	C i = 0
-	ld	[IDX1+i], t1 	! 1
-C .Lfinal_loop:
-	! IDX2(j) = j XOR 2
-	xor	i, 8, t2
+	ld	[IDX1+0], t1 	! 1
+	C xor	i, 8, t2
 	add	wtxt, t1, t1	! 1
 	ldub	[t1+2], t1	! 1
 
-	ld	[wtxt+i], t0	! 0
-	lduh	[wtxt+t2], t2	! 2
+	ld	[wtxt+0], t0	! 0
+	! IDX2(j) = j XOR 2
+	lduh	[wtxt+8], t2	! 2
 	and	t0, 255, t0	! 0
-	ld	[IDX3 + i], t3	! 3
+	ld	[IDX3 + 0], t3	! 3
 	
 	and	t2, 255, t2	! 2
 	ldub	[T+t1], t1	! 1
@@ -322,13 +321,12 @@ C .Lfinal_loop:
 	
 	sll	t2, 16, t2	! 2
 	or	t0, t2, t0	! 0, 1, 2
-	ld	[key + i], t2
+	ld	[key + 0], t2
 	sll	t3, 24, t3	! 3
 	
 	or	t0, t3, t0	! 0, 1, 2, 3
 	xor	t0, t2, t0
 	add	i, 4, i
-	cmp	i, 12
 	
 	srl	t0, 24, t3
 	srl	t0, 16, t2
@@ -340,12 +338,11 @@ C .Lfinal_loop:
 	stb	t0, [dst]
 	add	dst, 4, dst
 	
-	C bleu	.Lfinal_loop
 	ld	[IDX1+i], t1 	! 1
 
 	C i = 1
 	ld	[IDX1+i], t1 	! 1
-C .Lfinal_loop:
+
 	! IDX2(j) = j XOR 2
 	xor	i, 8, t2
 	add	wtxt, t1, t1	! 1
@@ -374,7 +371,6 @@ C .Lfinal_loop:
 	or	t0, t3, t0	! 0, 1, 2, 3
 	xor	t0, t2, t0
 	add	i, 4, i
-	cmp	i, 12
 	
 	srl	t0, 24, t3
 	srl	t0, 16, t2
@@ -386,12 +382,11 @@ C .Lfinal_loop:
 	stb	t0, [dst]
 	add	dst, 4, dst
 	
-	C bleu	.Lfinal_loop
 	ld	[IDX1+i], t1 	! 1
 
 	C i = 2
 	ld	[IDX1+i], t1 	! 1
-C .Lfinal_loop:
+	
 	! IDX2(j) = j XOR 2
 	xor	i, 8, t2
 	add	wtxt, t1, t1	! 1
@@ -420,7 +415,6 @@ C .Lfinal_loop:
 	or	t0, t3, t0	! 0, 1, 2, 3
 	xor	t0, t2, t0
 	add	i, 4, i
-	cmp	i, 12
 	
 	srl	t0, 24, t3
 	srl	t0, 16, t2
@@ -432,12 +426,11 @@ C .Lfinal_loop:
 	stb	t0, [dst]
 	add	dst, 4, dst
 	
-	C bleu	.Lfinal_loop
 	ld	[IDX1+i], t1 	! 1
 
 	C i = 3
 	ld	[IDX1+i], t1 	! 1
-C .Lfinal_loop:
+
 	! IDX2(j) = j XOR 2
 	xor	i, 8, t2
 	add	wtxt, t1, t1	! 1
@@ -466,7 +459,6 @@ C .Lfinal_loop:
 	or	t0, t3, t0	! 0, 1, 2, 3
 	xor	t0, t2, t0
 	add	i, 4, i
-	cmp	i, 12
 	
 	srl	t0, 24, t3
 	srl	t0, 16, t2
@@ -478,7 +470,6 @@ C .Lfinal_loop:
 	stb	t0, [dst]
 	add	dst, 4, dst
 	
-	C bleu	.Lfinal_loop
 	ld	[IDX1+i], t1 	! 1
 			
 	C Unrolled final loop ends
-- 
GitLab