From d5b7d9fab394a0aa522e0dac9ae5202b7a5b97e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Mon, 17 Oct 2005 21:50:17 +0200
Subject: [PATCH] * sparc/arcfour-crypt.asm: Improved instruction scheduling.

Rev: src/nettle/sparc/arcfour-crypt.asm:1.3
---
 sparc/arcfour-crypt.asm | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/sparc/arcfour-crypt.asm b/sparc/arcfour-crypt.asm
index 9252a1a9..9cc19f7d 100644
--- a/sparc/arcfour-crypt.asm
+++ b/sparc/arcfour-crypt.asm
@@ -60,19 +60,19 @@ PROLOGUE(nettle_arcfour_crypt)
 	add	I, 1, I
 	and	I, 0xff, I
 	ldub	[CTX + I], SI
+	subcc	LENGTH,1,LENGTH
+	ldub	[SRC], TMP
 	add	J, SI, J
 	and	J, 0xff, J
 	ldub	[CTX + J], SJ
-	stb	SJ, [CTX + I]
+	add	SRC, 1, SRC
 	stb	SI, [CTX + J]
 	add	SI, SJ, SI
 	and	SI, 0xff, SI
 	ldub	[CTX + SI], SI
-	ldub	[SRC], TMP
+	stb	SJ, [CTX + I]
 	xor	TMP, SI, TMP
 	stb	TMP, [DST]
-	subcc	LENGTH,1,LENGTH
-	add	SRC, 1, SRC
 	bne	.Loop
 	add	DST, 1, DST
 
@@ -91,7 +91,11 @@ C Some stats from adriana.lysator.liu.se (SS1000$, 85 MHz), for AES 128
 
 C 1:	nettle-1.13 C-code
 C 2:	First working version of the assembler code
-	
+C 3:	Moved load of source byte
+C 4:	Better instruction scheduling
+
 C	MB/s	cycles/byte	Code size (bytes)
 C 1:	6.6	12.4		132
 C 2:	5.6	14.5		116
+C 3:	6.0	13.5		116
+C 4:	6.5	12.4		116
-- 
GitLab