From b58a7f6647813d516659a26459c77f5a12158017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Wed, 20 Oct 2004 16:30:39 +0200
Subject: [PATCH] (nettle_arcfour_crypt): Replaced addb -> addl + andl $0xff,
 improving speed on PPro by another 15%.

Rev: src/nettle/x86/arcfour-crypt.asm:1.8
---
 x86/arcfour-crypt.asm | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/x86/arcfour-crypt.asm b/x86/arcfour-crypt.asm
index 6b99cc57..7056c50b 100644
--- a/x86/arcfour-crypt.asm
+++ b/x86/arcfour-crypt.asm
@@ -33,11 +33,21 @@ nettle_arcfour_crypt:
 	pushl	%esi		C  4(%esp)
 	pushl	%edi		C  0(%esp)
 
+C Input arguments:
 	C ctx = 20(%esp)
 	C length = 24(%esp)
 	C dst = 28(%esp)
 	C src = 32(%esp)
-
+C Register usage:
+	C %ebp = ctx
+	C %esi = src (updated through out loop)
+	C %edi = dst (updated through out loop)
+	C %edx = src + length (end of source area)
+	C %eax = i
+	C %ebx = j
+	C %cl  = si
+	C %ch  = sj
+	
 	movl	24(%esp), %edx		C  length
 	testl	%edx,%edx
 	jz	.Lend
@@ -50,13 +60,16 @@ nettle_arcfour_crypt:
 	movzbl  256(%ebp), %eax		C  i
 	movzbl  257(%ebp), %ebx		C  j
 .Lloop:
+C	incb	%al
 	incl	%eax
 	andl	$0xff, %eax
 	movzbl  (%ebp, %eax), %ecx	C  si. Clears high bytes
-	addb    %cl, %bl
+C	addb    %cl, %bl
+	addl	%ecx, %ebx
+	andl	$0xff, %ebx
 	movb    (%ebp, %ebx), %ch	C  sj
 	movb    %ch, (%ebp, %eax)	C  S[i] = sj
-	movb	%cl, (%ebp, %ebx)	C  C[j] = si
+	movb	%cl, (%ebp, %ebx)	C  S[j] = si
 	addb    %ch, %cl
 	xorb    %ch, %ch		C  Clear, so it can be used
 					C  for indexing.
-- 
GitLab