From 73bdcbe9d3b55c97f54820c1174ef8666c83be45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Sun, 7 Aug 2022 20:34:12 +0200
Subject: [PATCH] Delete all arcfour assembly code

---
 ChangeLog                 |  10 ++
 Makefile.in               |   2 +-
 arcfour-crypt.c           |  61 ----------
 arcfour.c                 |  21 ++++
 asm.m4                    |   6 -
 configure.ac              |   2 +-
 sparc32/arcfour-crypt.asm | 242 --------------------------------------
 sparc64/arcfour-crypt.asm | 229 ------------------------------------
 x86/arcfour-crypt.asm     | 125 --------------------
 9 files changed, 33 insertions(+), 665 deletions(-)
 delete mode 100644 arcfour-crypt.c
 delete mode 100644 sparc32/arcfour-crypt.asm
 delete mode 100644 sparc64/arcfour-crypt.asm
 delete mode 100644 x86/arcfour-crypt.asm

diff --git a/ChangeLog b/ChangeLog
index e7dbb612..7ce9d354 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2022-08-07  Niels Möller  <nisse@lysator.liu.se>
+
+	Delete all arcfour assembly code.
+	* arcfour.c (arcfour_crypt): Moved function here, from...
+	* arcfour-crypt.c: ... deleted file.
+	* sparc32/arcfour-crypt.asm: Deleted.
+	* sparc64/arcfour-crypt.asm: Deleted.
+	* x86/arcfour-crypt.asm: Deleted.
+	* asm.m4: Delete arcfour structure offsets.
+
 2022-08-05  Niels Möller  <nisse@lysator.liu.se>
 
 	* nettle-internal.h: Include stdlib.h, fix alloca warnings on BSD.
diff --git a/Makefile.in b/Makefile.in
index 11c88114..4b4672fa 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -81,7 +81,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \
 		 aes256-set-encrypt-key.c aes256-set-decrypt-key.c \
 		 aes256-meta.c \
 		 nist-keywrap.c \
-		 arcfour.c arcfour-crypt.c \
+		 arcfour.c \
 		 arctwo.c arctwo-meta.c blowfish.c blowfish-bcrypt.c \
 		 base16-encode.c base16-decode.c base16-meta.c \
 		 base64-encode.c base64-decode.c base64-meta.c \
diff --git a/arcfour-crypt.c b/arcfour-crypt.c
deleted file mode 100644
index 215c556a..00000000
--- a/arcfour-crypt.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* arcfour-crypt.c
-
-   The arcfour/rc4 stream cipher.
-
-   Copyright (C) 2001, 2004 Niels Möller
-
-   This file is part of GNU Nettle.
-
-   GNU Nettle is free software: you can redistribute it and/or
-   modify it under the terms of either:
-
-     * the GNU Lesser General Public License as published by the Free
-       Software Foundation; either version 3 of the License, or (at your
-       option) any later version.
-
-   or
-
-     * the GNU General Public License as published by the Free
-       Software Foundation; either version 2 of the License, or (at your
-       option) any later version.
-
-   or both in parallel, as here.
-
-   GNU Nettle is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received copies of the GNU General Public License and
-   the GNU Lesser General Public License along with this program.  If
-   not, see http://www.gnu.org/licenses/.
-*/
-
-#if HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <assert.h>
-
-#include "arcfour.h"
-
-void
-arcfour_crypt(struct arcfour_ctx *ctx,
-	      size_t length, uint8_t *dst,
-	      const uint8_t *src)
-{
-  register uint8_t i, j;
-  register int si, sj;
-
-  i = ctx->i; j = ctx->j;
-  while(length--)
-    {
-      i++; i &= 0xff;
-      si = ctx->S[i];
-      j += si; j &= 0xff;
-      sj = ctx->S[i] = ctx->S[j];
-      ctx->S[j] = si;
-      *dst++ = *src++ ^ ctx->S[ (si + sj) & 0xff ];
-    }
-  ctx->i = i; ctx->j = j;
-}
diff --git a/arcfour.c b/arcfour.c
index 87f4959f..795a2d77 100644
--- a/arcfour.c
+++ b/arcfour.c
@@ -69,3 +69,24 @@ arcfour128_set_key(struct arcfour_ctx *ctx, const uint8_t *key)
 {
   arcfour_set_key (ctx, ARCFOUR128_KEY_SIZE, key);
 }
+
+void
+arcfour_crypt(struct arcfour_ctx *ctx,
+	      size_t length, uint8_t *dst,
+	      const uint8_t *src)
+{
+  register uint8_t i, j;
+  register int si, sj;
+
+  i = ctx->i; j = ctx->j;
+  while(length--)
+    {
+      i++; i &= 0xff;
+      si = ctx->S[i];
+      j += si; j &= 0xff;
+      sj = ctx->S[i] = ctx->S[j];
+      ctx->S[j] = si;
+      *dst++ = *src++ ^ ctx->S[ (si + sj) & 0xff ];
+    }
+  ctx->i = i; ctx->j = j;
+}
diff --git a/asm.m4 b/asm.m4
index 60c66c25..2bb1068c 100644
--- a/asm.m4
+++ b/asm.m4
@@ -73,12 +73,6 @@ define(`UCHAR', `STRUCT(`$1', 1)')dnl
 dnl UNSIGNED(name)
 define(`UNSIGNED', `STRUCT(`$1', 4)')dnl
 
-dnl Offsets in arcfour_ctx
-STRUCTURE(ARCFOUR)
-  STRUCT(S, 256)
-  UCHAR(I)
-  UCHAR(J)
-
 dnl Offsets in aes_table
 define(AES_SBOX_SIZE,	256)dnl
 define(AES_TABLE_SIZE,	1024)dnl
diff --git a/configure.ac b/configure.ac
index 7a046f11..73ce5764 100644
--- a/configure.ac
+++ b/configure.ac
@@ -591,7 +591,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
 		aes256-encrypt.asm aes256-decrypt.asm \
 		cbc-aes128-encrypt.asm cbc-aes192-encrypt.asm \
 		cbc-aes256-encrypt.asm \
-		arcfour-crypt.asm camellia-crypt-internal.asm \
+		camellia-crypt-internal.asm \
 		md5-compress.asm memxor.asm memxor3.asm \
 		ghash-set-key.asm ghash-update.asm \
 		poly1305-internal.asm \
diff --git a/sparc32/arcfour-crypt.asm b/sparc32/arcfour-crypt.asm
deleted file mode 100644
index 0093e550..00000000
--- a/sparc32/arcfour-crypt.asm
+++ /dev/null
@@ -1,242 +0,0 @@
-C sparc32/arcfour-crypt.asm
-
-ifelse(`
-   Copyright (C) 2002, 2005 Niels Möller
-
-   This file is part of GNU Nettle.
-
-   GNU Nettle is free software: you can redistribute it and/or
-   modify it under the terms of either:
-
-     * the GNU Lesser General Public License as published by the Free
-       Software Foundation; either version 3 of the License, or (at your
-       option) any later version.
-
-   or
-
-     * the GNU General Public License as published by the Free
-       Software Foundation; either version 2 of the License, or (at your
-       option) any later version.
-
-   or both in parallel, as here.
-
-   GNU Nettle is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received copies of the GNU General Public License and
-   the GNU Lesser General Public License along with this program.  If
-   not, see http://www.gnu.org/licenses/.
-')
-
-C	Define to YES, to enable the complex code to special case SRC
-C	and DST with compatible alignment.
-	
-define(`WITH_ALIGN', `YES')
-
-C	Registers
-
-define(`CTX',	`%i0')
-define(`LENGTH',`%i1')
-define(`DST',	`%i2')
-define(`SRC',	`%i3')
-
-define(`I1',	`%i4')
-define(`I2',	`%i5')
-define(`J',	`%g1')
-define(`SI',	`%g2')
-define(`SJ',	`%g3')
-define(`TMP',	`%o0')
-define(`TMP2',	`%o1')
-define(`N',	`%o2')
-define(`DATA',	`%o3')
-
-C	Computes the next byte of the key stream. As input, i must
-C	already point to the index for the current access, the index
-C	for the next access is stored in ni. The resulting key byte is
-C	stored in res.
-C	ARCFOUR_BYTE(i, ni, res)
-define(`ARCFOUR_BYTE', `
-	ldub	[CTX + $1], SI
-	add	$1, 1, $2
-	add	J, SI, J
-	and	J, 0xff, J
-	ldub	[CTX + J], SJ
-	and	$2, 0xff, $2
-	stb	SI, [CTX + J]
-	add	SI, SJ, SI
-	and	SI, 0xff, SI
-	stb	SJ, [CTX + $1]
-	ldub	[CTX + SI], $3
-')dnl
-			
-C	FIXME: Consider using the callers window
-define(`FRAME_SIZE', 104)
-
-	.file "arcfour-crypt.asm"
-
-	C arcfour_crypt(struct arcfour_ctx *ctx,
-	C               size_t length, uint8_t *dst,
-	C               const uint8_t *src)
-
-	.section	".text"
-	.align 16
-	.proc	020
-	
-PROLOGUE(nettle_arcfour_crypt)
-
-	save	%sp, -FRAME_SIZE, %sp
-	cmp	LENGTH, 0
-	be	.Lend
-	nop
-	
-	C	Load both I and J
-	lduh	[CTX + ARCFOUR_I], I1
-	and	I1, 0xff, J
-	srl	I1, 8, I1
-
-	C	We want an even address for DST
-	andcc	DST, 1, %g0
-	add	I1, 1 ,I1
-	beq	.Laligned2
-	and	I1, 0xff, I1
-
-	mov	I1, I2
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I2, I1, TMP)
-	subcc	LENGTH, 1, LENGTH
-	add	SRC, 1, SRC
-	xor	DATA, TMP, DATA
-	stb	DATA, [DST]
-	beq	.Ldone
-	add	DST, 1, DST
-
-.Laligned2:
-
-	cmp	LENGTH, 2
-	blu	.Lfinal1
-	C	Harmless delay slot instruction	
-	andcc	DST, 2, %g0
-	beq	.Laligned4
-	nop
-
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I1, I2, TMP)
-	ldub	[SRC + 1], TMP2
-	add	SRC, 2, SRC
-	xor	DATA, TMP, DATA
-	sll	DATA, 8, DATA	
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	subcc	LENGTH, 2, LENGTH
-	or	DATA, TMP, DATA
-
-	sth	DATA, [DST]
-	beq	.Ldone
-	add	DST, 2, DST
-	
-.Laligned4:
-	cmp	LENGTH, 4
-	blu	.Lfinal2
-	C	Harmless delay slot instruction
-	srl	LENGTH, 2, N
-	
-.Loop:
-	C	Main loop, with aligned writes
-	
-	C	FIXME: Could check if SRC is aligned, and
-	C	use 32-bit reads in that case.
-
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I1, I2, TMP)
-	ldub	[SRC + 1], TMP2
-	xor	TMP, DATA, DATA
-	sll	DATA, 8, DATA
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	ldub	[SRC + 2], TMP2
-	or	TMP, DATA, DATA
-	sll	DATA, 8, DATA
-
-	ARCFOUR_BYTE(I1, I2, TMP)
-	xor	TMP2, TMP, TMP
-	ldub	[SRC + 3], TMP2
-	or	TMP, DATA, DATA
-	sll	DATA, 8, DATA
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	or	TMP, DATA, DATA
-	subcc	N, 1, N
-	add	SRC, 4, SRC
-	st	DATA, [DST]
-	bne	.Loop
-	add	DST, 4, DST
-	
-	andcc	LENGTH, 3, LENGTH
-	beq	.Ldone
-	nop
-
-.Lfinal2:
-	C	DST address must be 2-aligned
-	cmp	LENGTH, 2
-	blu	.Lfinal1
-	nop
-
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I1, I2, TMP)
-	ldub	[SRC + 1], TMP2
-	add	SRC, 2, SRC
-	xor	DATA, TMP, DATA
-	sll	DATA, 8, DATA	
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	or	DATA, TMP, DATA
-
-	sth	DATA, [DST]
-	beq	.Ldone
-	add	DST, 2, DST
-
-.Lfinal1:
-	mov	I1, I2
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	DATA, TMP, DATA
-	stb	DATA, [DST]
-
-.Ldone:
-	C	Save back I and J
-	sll	I2, 8, I2
-	or	I2, J, I2
-	stuh	I2, [CTX + ARCFOUR_I]
-
-.Lend:
-	ret
-	restore
-
-EPILOGUE(nettle_arcfour_crypt)
-
-C Some stats from adriana.lysator.liu.se (SS1000E, 85 MHz), for AES 128
-
-C 1:	nettle-1.13 C-code
-C 2:	First working version of the assembler code
-C 3:	Moved load of source byte
-C 4:	Better instruction scheduling
-C 5:	Special case SRC and DST with compatible alignment
-C 6:	After bugfix (reorder of ld [CTX+SI+SJ] and st [CTX + SI])
-C 7:	Unrolled only twice, with byte-accesses
-C 8:	Unrolled, using 8-bit reads and aligned 32-bit writes.
-
-C	MB/s	cycles/byte	Code size (bytes)
-C 1:	6.6	12.4		132
-C 2:	5.6	14.5		116
-C 3:	6.0	13.5		116
-C 4:	6.5	12.4		116
-C 5:	7.9	10.4		496
-C 6:	8.3	9.7		496
-C 7:	6.7	12.1		268
-C 8:	8.3	9.8		768
diff --git a/sparc64/arcfour-crypt.asm b/sparc64/arcfour-crypt.asm
deleted file mode 100644
index ffb5edf4..00000000
--- a/sparc64/arcfour-crypt.asm
+++ /dev/null
@@ -1,229 +0,0 @@
-C sparc64/arcfour-crypt.asm
-
-ifelse(`
-   Copyright (C) 2002, 2005 Niels Möller
-
-   This file is part of GNU Nettle.
-
-   GNU Nettle is free software: you can redistribute it and/or
-   modify it under the terms of either:
-
-     * the GNU Lesser General Public License as published by the Free
-       Software Foundation; either version 3 of the License, or (at your
-       option) any later version.
-
-   or
-
-     * the GNU General Public License as published by the Free
-       Software Foundation; either version 2 of the License, or (at your
-       option) any later version.
-
-   or both in parallel, as here.
-
-   GNU Nettle is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received copies of the GNU General Public License and
-   the GNU Lesser General Public License along with this program.  If
-   not, see http://www.gnu.org/licenses/.
-')
-
-C	Define to YES, to enable the complex code to special case SRC
-C	and DST with compatible alignment.
-	
-define(`WITH_ALIGN', `YES')
-
-C	Registers
-
-define(`CTX',	`%i0')
-define(`LENGTH',`%i1')
-define(`DST',	`%i2')
-define(`SRC',	`%i3')
-
-define(`I1',	`%i4')
-define(`I2',	`%i5')
-define(`J',	`%g1')
-define(`SI',	`%g2')
-define(`SJ',	`%g3')
-define(`TMP',	`%o0')
-define(`TMP2',	`%o1')
-define(`N',	`%o2')
-define(`DATA',	`%o3')
-
-C	Computes the next byte of the key stream. As input, i must
-C	already point to the index for the current access, the index
-C	for the next access is stored in ni. The resulting key byte is
-C	stored in res.
-C	ARCFOUR_BYTE(i, ni, res)
-define(`ARCFOUR_BYTE', `
-	ldub	[CTX + $1], SI
-	add	$1, 1, $2
-	add	J, SI, J
-	and	J, 0xff, J
-	ldub	[CTX + J], SJ
-	and	$2, 0xff, $2
-	stb	SI, [CTX + J]
-	add	SI, SJ, SI
-	and	SI, 0xff, SI
-	stb	SJ, [CTX + $1]
-	ldub	[CTX + SI], $3
-')dnl
-			
-define(`FRAME_SIZE', 192)
-
-	.file "arcfour-crypt.asm"
-
-	C arcfour_crypt(struct arcfour_ctx *ctx,
-	C               size_t length, uint8_t *dst,
-	C               const uint8_t *src)
-
-	.section	".text"
-	.align 16
-	.proc	020
-	
-PROLOGUE(nettle_arcfour_crypt)
-
-	save	%sp, -FRAME_SIZE, %sp
-	cmp	LENGTH, 0
-	be	.Lend
-	nop
-	
-	C	Load both I and J
-	lduh	[CTX + ARCFOUR_I], I1
-	and	I1, 0xff, J
-	srl	I1, 8, I1
-
-	C	We want an even address for DST
-	andcc	DST, 1, %g0
-	add	I1, 1 ,I1
-	beq	.Laligned2
-	and	I1, 0xff, I1
-
-	mov	I1, I2
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I2, I1, TMP)
-	subcc	LENGTH, 1, LENGTH
-	add	SRC, 1, SRC
-	xor	DATA, TMP, DATA
-	stb	DATA, [DST]
-	beq	.Ldone
-	add	DST, 1, DST
-
-.Laligned2:
-
-	cmp	LENGTH, 2
-	blu	.Lfinal1
-	C	Harmless delay slot instruction	
-	andcc	DST, 2, %g0
-	beq	.Laligned4
-	nop
-
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I1, I2, TMP)
-	ldub	[SRC + 1], TMP2
-	add	SRC, 2, SRC
-	xor	DATA, TMP, DATA
-	sll	DATA, 8, DATA	
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	subcc	LENGTH, 2, LENGTH
-	or	DATA, TMP, DATA
-
-	sth	DATA, [DST]
-	beq	.Ldone
-	add	DST, 2, DST
-	
-.Laligned4:
-	cmp	LENGTH, 4
-	blu	.Lfinal2
-	C	Harmless delay slot instruction
-	srl	LENGTH, 2, N
-	
-.Loop:
-	C	Main loop, with aligned writes
-	
-	C	FIXME: Could check if SRC is aligned, and
-	C	use 32-bit reads in that case.
-
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I1, I2, TMP)
-	ldub	[SRC + 1], TMP2
-	xor	TMP, DATA, DATA
-	sll	DATA, 8, DATA
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	ldub	[SRC + 2], TMP2
-	or	TMP, DATA, DATA
-	sll	DATA, 8, DATA
-
-	ARCFOUR_BYTE(I1, I2, TMP)
-	xor	TMP2, TMP, TMP
-	ldub	[SRC + 3], TMP2
-	or	TMP, DATA, DATA
-	sll	DATA, 8, DATA
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	or	TMP, DATA, DATA
-	subcc	N, 1, N
-	add	SRC, 4, SRC
-	st	DATA, [DST]
-	bne	.Loop
-	add	DST, 4, DST
-	
-	andcc	LENGTH, 3, LENGTH
-	beq	.Ldone
-	nop
-
-.Lfinal2:
-	C	DST address must be 2-aligned
-	cmp	LENGTH, 2
-	blu	.Lfinal1
-	nop
-
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I1, I2, TMP)
-	ldub	[SRC + 1], TMP2
-	add	SRC, 2, SRC
-	xor	DATA, TMP, DATA
-	sll	DATA, 8, DATA	
-
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	TMP2, TMP, TMP
-	or	DATA, TMP, DATA
-
-	sth	DATA, [DST]
-	beq	.Ldone
-	add	DST, 2, DST
-
-.Lfinal1:
-	mov	I1, I2
-	ldub	[SRC], DATA
-	ARCFOUR_BYTE(I2, I1, TMP)
-	xor	DATA, TMP, DATA
-	stb	DATA, [DST]
-
-.Ldone:
-	C	Save back I and J
-	sll	I2, 8, I2
-	or	I2, J, I2
-	stuh	I2, [CTX + ARCFOUR_I]
-
-.Lend:
-	ret
-	restore
-
-EPILOGUE(nettle_arcfour_crypt)
-
-C	Stats for AES 128 on sellafield.lysator.liu.se (UE450, 296 MHz)
-
-C 1:	nettle-1.13 C-code
-C 2:	New assembler code (basically the same as for sparc32)
-
-C	MB/s	cycles/byte
-C 1:	3.6	77.7
-C 2:	21.8	13.0
diff --git a/x86/arcfour-crypt.asm b/x86/arcfour-crypt.asm
deleted file mode 100644
index e03df8e9..00000000
--- a/x86/arcfour-crypt.asm
+++ /dev/null
@@ -1,125 +0,0 @@
-C x86/arcfour-crypt.asm
-
-ifelse(`
-   Copyright (C) 2004, Niels Möller
-
-   This file is part of GNU Nettle.
-
-   GNU Nettle is free software: you can redistribute it and/or
-   modify it under the terms of either:
-
-     * the GNU Lesser General Public License as published by the Free
-       Software Foundation; either version 3 of the License, or (at your
-       option) any later version.
-
-   or
-
-     * the GNU General Public License as published by the Free
-       Software Foundation; either version 2 of the License, or (at your
-       option) any later version.
-
-   or both in parallel, as here.
-
-   GNU Nettle is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received copies of the GNU General Public License and
-   the GNU Lesser General Public License along with this program.  If
-   not, see http://www.gnu.org/licenses/.
-')
-
-	.file "arcfour-crypt.asm"
-
-	C arcfour_crypt(struct arcfour_ctx *ctx,
-	C               size_t length, uint8_t *dst,
-	C               const uint8_t *src)
-	.text
-	ALIGN(16)
-PROLOGUE(nettle_arcfour_crypt)
-	C save all registers that need to be saved
-	pushl	%ebx		C  12(%esp)
-	pushl	%ebp		C  8(%esp)
-	pushl	%esi		C  4(%esp)
-	pushl	%edi		C  0(%esp)
-
-C Input arguments:
-	C ctx = 20(%esp)
-	C length = 24(%esp)
-	C dst = 28(%esp)
-	C src = 32(%esp)
-C Register usage:
-	C %ebp = ctx
-	C %esi = src
-	C %edi = dst
-	C %edx = loop counter
-	C %eax = i
-	C %ebx = j
-	C %cl  = si
-	C %ch  = sj
-
-	movl	24(%esp), %edx		C  length
-	movl	20(%esp), %ebp		C  ctx
-	movl	28(%esp), %edi		C  dst
-	movl	32(%esp), %esi		C  src
-
-	lea	(%edx, %edi), %edi
-	lea	(%edx, %esi), %esi
-	negl	%edx
-	jnc	.Lend
-	
-	movzbl  ARCFOUR_I (%ebp), %eax	C  i
-	movzbl  ARCFOUR_J (%ebp), %ebx	C  j
-
-	incb	%al
-	sarl	$1, %edx
-	jc	.Lloop_odd
-	
-	ALIGN(16)
-.Lloop:
-	movb	(%ebp, %eax), %cl	C  si.
-	addb    %cl, %bl
-	movb    (%ebp, %ebx), %ch	C  sj
-	movb    %ch, (%ebp, %eax)	C  S[i] = sj
-	incl	%eax
-	movzbl	%al, %eax
-	movb	%cl, (%ebp, %ebx)	C  S[j] = si
-	addb    %ch, %cl
-	movzbl  %cl, %ecx		C  Clear, so it can be used
-					C  for indexing.
-	movb    (%ebp, %ecx), %cl
-	xorb    (%esi, %edx, 2), %cl
-	movb    %cl, (%edi, %edx, 2)
-
-	C FIXME: Could exchange cl and ch in the second half
-	C and try to interleave instructions better.
-.Lloop_odd:
-	movb	(%ebp, %eax), %cl	C  si.
-	addb    %cl, %bl
-	movb    (%ebp, %ebx), %ch	C  sj
-	movb    %ch, (%ebp, %eax)	C  S[i] = sj
-	incl	%eax
-	movzbl	%al, %eax
-	movb	%cl, (%ebp, %ebx)	C  S[j] = si
-	addb    %ch, %cl
-	movzbl  %cl, %ecx		C  Clear, so it can be used
-					C  for indexing.
-	movb    (%ebp, %ecx), %cl
-	xorb    1(%esi, %edx, 2), %cl
-	incl    %edx
-	movb    %cl, -1(%edi, %edx, 2)
-
-	jnz	.Lloop
-
-C .Lloop_done:
-	decb	%al
-	movb	%al, ARCFOUR_I (%ebp)		C  Store the new i and j.
-	movb	%bl, ARCFOUR_J (%ebp)
-.Lend:
-	popl	%edi
-	popl	%esi
-	popl	%ebp
-	popl	%ebx
-	ret
-EPILOGUE(nettle_arcfour_crypt)
-- 
GitLab