From 280e0fda9f47b51dbda08730bb4eee27be73f50f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Sun, 24 Oct 2004 22:54:38 +0200 Subject: [PATCH] Reverted the latest two changes; update bost src and dst pointers in the loop, and use plain addb when updating j. These two previous changes slowed the code down on AMD Duron. Rev: src/nettle/x86/arcfour-crypt.asm:1.10 --- x86/arcfour-crypt.asm | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/x86/arcfour-crypt.asm b/x86/arcfour-crypt.asm index 43e4f7d3..74dd7763 100644 --- a/x86/arcfour-crypt.asm +++ b/x86/arcfour-crypt.asm @@ -47,7 +47,7 @@ C Register usage: C %ebx = j C %cl = si C %ch = sj - + movl 24(%esp), %edx C length testl %edx,%edx jz .Lend @@ -59,15 +59,15 @@ C Register usage: movzbl 256(%ebp), %eax C i movzbl 257(%ebp), %ebx C j - subl %esi, %edi .Lloop: C incb %al incl %eax andl $0xff, %eax movzbl (%ebp, %eax), %ecx C si. Clears high bytes -C addb %cl, %bl - addl %ecx, %ebx - andl $0xff, %ebx + addb %cl, %bl +C The addl andl is preferable on PPro and PII, but slows thing down on AMD Duron. +C addl %ecx, %ebx +C andl $0xff, %ebx movb (%ebp, %ebx), %ch C sj movb %ch, (%ebp, %eax) C S[i] = sj movb %cl, (%ebp, %ebx) C S[j] = si @@ -76,8 +76,9 @@ C addb %cl, %bl C for indexing. movb (%ebp, %ecx), %cl xorb (%esi), %cl - movb %cl, (%esi,%edi) incl %esi + movb %cl, (%edi) + incl %edi cmpl %esi, %edx jne .Lloop -- GitLab