Skip to content
Snippets Groups Projects
Commit 280e0fda authored by Niels Möller's avatar Niels Möller
Browse files

Reverted the latest two changes; update

bost src and dst pointers in the loop, and use plain addb when
updating j. These two previous changes slowed the code down on AMD
Duron.

Rev: src/nettle/x86/arcfour-crypt.asm:1.10
parent 5fbaf49c
No related branches found
No related tags found
No related merge requests found
...@@ -59,15 +59,15 @@ C Register usage: ...@@ -59,15 +59,15 @@ C Register usage:
movzbl 256(%ebp), %eax C i movzbl 256(%ebp), %eax C i
movzbl 257(%ebp), %ebx C j movzbl 257(%ebp), %ebx C j
subl %esi, %edi
.Lloop: .Lloop:
C incb %al C incb %al
incl %eax incl %eax
andl $0xff, %eax andl $0xff, %eax
movzbl (%ebp, %eax), %ecx C si. Clears high bytes movzbl (%ebp, %eax), %ecx C si. Clears high bytes
C addb %cl, %bl addb %cl, %bl
addl %ecx, %ebx C The addl andl is preferable on PPro and PII, but slows thing down on AMD Duron.
andl $0xff, %ebx C addl %ecx, %ebx
C andl $0xff, %ebx
movb (%ebp, %ebx), %ch C sj movb (%ebp, %ebx), %ch C sj
movb %ch, (%ebp, %eax) C S[i] = sj movb %ch, (%ebp, %eax) C S[i] = sj
movb %cl, (%ebp, %ebx) C S[j] = si movb %cl, (%ebp, %ebx) C S[j] = si
...@@ -76,8 +76,9 @@ C addb %cl, %bl ...@@ -76,8 +76,9 @@ C addb %cl, %bl
C for indexing. C for indexing.
movb (%ebp, %ecx), %cl movb (%ebp, %ecx), %cl
xorb (%esi), %cl xorb (%esi), %cl
movb %cl, (%esi,%edi)
incl %esi incl %esi
movb %cl, (%edi)
incl %edi
cmpl %esi, %edx cmpl %esi, %edx
jne .Lloop jne .Lloop
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment