From d4494c3e79b25cf63c2256a030da112f3413ed2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Wed, 19 Oct 2005 10:00:06 +0200 Subject: [PATCH] Fixed bug, spotted by Mikael Kalms. We must order the store at [CTX+I] before the load of [CTX+SI+SJ]. Rev: src/nettle/sparc/arcfour-crypt.asm:1.6 --- sparc/arcfour-crypt.asm | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sparc/arcfour-crypt.asm b/sparc/arcfour-crypt.asm index 0dd9e363..0c24d254 100644 --- a/sparc/arcfour-crypt.asm +++ b/sparc/arcfour-crypt.asm @@ -21,7 +21,7 @@ C MA 02111-1307, USA. C Define to YES, to enable the complex code to special case SRC C and DST with compatible alignment. -define(<WITH_ALIGN>, <NO>) +define(<WITH_ALIGN>, <YES>) C Registers @@ -54,8 +54,8 @@ $2: stb SI, [CTX + J] add SI, SJ, SI and SI, 0xff, SI - ldub [CTX + SI], SI stb SJ, [CTX + I] + ldub [CTX + SI], SI xor TMP, SI, TMP stb TMP, [DST] bne $2 @@ -77,8 +77,8 @@ $2: stb SI, [CTX + J] add SI, SJ, SI and SI, 0xff, SI - ldub [CTX + SI], TMP stb SJ, [CTX + I] + ldub [CTX + SI], TMP add I, 1, I and I, 0xff, I @@ -90,9 +90,9 @@ $2: stb SI, [CTX + J] add SI, SJ, SI and SI, 0xff, SI + stb SJ, [CTX + I] ldub [CTX + SI], SI sll TMP, 8, TMP - stb SJ, [CTX + I] or TMP, SI, TMP add I, 1, I @@ -105,9 +105,9 @@ $2: stb SI, [CTX + J] add SI, SJ, SI and SI, 0xff, SI + stb SJ, [CTX + I] ldub [CTX + SI], SI sll TMP, 8, TMP - stb SJ, [CTX + I] or TMP, SI, TMP add I, 1, I @@ -120,9 +120,9 @@ $2: stb SI, [CTX + J] add SI, SJ, SI and SI, 0xff, SI + stb SJ, [CTX + I] ldub [CTX + SI], SI sll TMP, 8, TMP - stb SJ, [CTX + I] or TMP, SI, TMP xor WORD, TMP, WORD st WORD, [DST] @@ -211,6 +211,7 @@ C 2: First working version of the assembler code C 3: Moved load of source byte C 4: Better instruction scheduling C 5: Special case SRC and DST with compatible alignment +C 6: After bugfix (reorder of ld [CTX+SI+SJ] and st [CTX + SI]) C MB/s cycles/byte Code size (bytes) C 1: 6.6 12.4 132 @@ -218,3 +219,4 @@ C 2: 5.6 14.5 116 C 3: 6.0 13.5 116 C 4: 6.5 12.4 116 C 5: 7.9 10.4 496 +C 6: 8.3 9.7 496 -- GitLab