arcfour-crypt.asm 4.2 KB
Newer Older
1 2
C nettle, low-level cryptographics library
C 
Niels Möller's avatar
Niels Möller committed
3
C Copyright (C) 2002, 2005 Niels Möller
4 5 6 7 8 9 10 11 12 13 14 15 16
C  
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C 
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
C License for more details.
C 
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB.  If not, write to
17 18
C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
C MA 02111-1301, USA.
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65

C	Define to YES, to enable the complex code to special case SRC
C	and DST with compatible alignment.
	
define(<WITH_ALIGN>, <YES>)

C	Registers

define(<CTX>,	<%i0>)
define(<LENGTH>,<%i1>)
define(<DST>,	<%i2>)
define(<SRC>,	<%i3>)

define(<I1>,	<%i4>)
define(<I2>,	<%i5>)
define(<J>,	<%g1>)
define(<SI>,	<%g2>)
define(<SJ>,	<%g3>)
define(<TMP>,	<%o0>)
define(<TMP2>,	<%o1>)
define(<N>,	<%o2>)
define(<DATA>,	<%o3>)

C	Computes the next byte of the key stream. As input, i must
C	already point to the index for the current access, the index
C	for the next access is stored in ni. The resulting key byte is
C	stored in res.
C	ARCFOUR_BYTE(i, ni, res)
define(<ARCFOUR_BYTE>, <
	ldub	[CTX + $1], SI
	add	$1, 1, $2
	add	J, SI, J
	and	J, 0xff, J
	ldub	[CTX + J], SJ
	and	$2, 0xff, $2
	stb	SI, [CTX + J]
	add	SI, SJ, SI
	and	SI, 0xff, SI
	stb	SJ, [CTX + $1]
	ldub	[CTX + SI], $3
>)dnl
			
define(<FRAME_SIZE>, 192)

	.file "arcfour-crypt.asm"

	C arcfour_crypt(struct arcfour_ctx *ctx,
Niels Möller's avatar
Niels Möller committed
66
	C               size_t length, uint8_t *dst,
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
	C               const uint8_t *src)

	.section	".text"
	.align 16
	.proc	020
	
PROLOGUE(nettle_arcfour_crypt)

	save	%sp, -FRAME_SIZE, %sp
	cmp	LENGTH, 0
	be	.Lend
	nop
	
	C	Load both I and J
	lduh	[CTX + ARCFOUR_I], I1
	and	I1, 0xff, J
	srl	I1, 8, I1

	C	We want an even address for DST
	andcc	DST, 1, %g0
	add	I1, 1 ,I1
	beq	.Laligned2
	and	I1, 0xff, I1

	mov	I1, I2
	ldub	[SRC], DATA
	ARCFOUR_BYTE(I2, I1, TMP)
	subcc	LENGTH, 1, LENGTH
	add	SRC, 1, SRC
	xor	DATA, TMP, DATA
	stb	DATA, [DST]
	beq	.Ldone
	add	DST, 1, DST

.Laligned2:

	cmp	LENGTH, 2
	blu	.Lfinal1
	C	Harmless delay slot instruction	
	andcc	DST, 2, %g0
	beq	.Laligned4
	nop

	ldub	[SRC], DATA
	ARCFOUR_BYTE(I1, I2, TMP)
	ldub	[SRC + 1], TMP2
	add	SRC, 2, SRC
	xor	DATA, TMP, DATA
	sll	DATA, 8, DATA	

	ARCFOUR_BYTE(I2, I1, TMP)
	xor	TMP2, TMP, TMP
	subcc	LENGTH, 2, LENGTH
	or	DATA, TMP, DATA

	sth	DATA, [DST]
	beq	.Ldone
	add	DST, 2, DST
	
.Laligned4:
	cmp	LENGTH, 4
	blu	.Lfinal2
	C	Harmless delay slot instruction
	srl	LENGTH, 2, N
	
.Loop:
	C	Main loop, with aligned writes
	
	C	FIXME: Could check if SRC is aligned, and
	C	use 32-bit reads in that case.

	ldub	[SRC], DATA
	ARCFOUR_BYTE(I1, I2, TMP)
	ldub	[SRC + 1], TMP2
	xor	TMP, DATA, DATA
	sll	DATA, 8, DATA

	ARCFOUR_BYTE(I2, I1, TMP)
	xor	TMP2, TMP, TMP
	ldub	[SRC + 2], TMP2
	or	TMP, DATA, DATA
	sll	DATA, 8, DATA

	ARCFOUR_BYTE(I1, I2, TMP)
	xor	TMP2, TMP, TMP
	ldub	[SRC + 3], TMP2
	or	TMP, DATA, DATA
	sll	DATA, 8, DATA

	ARCFOUR_BYTE(I2, I1, TMP)
	xor	TMP2, TMP, TMP
	or	TMP, DATA, DATA
	subcc	N, 1, N
	add	SRC, 4, SRC
	st	DATA, [DST]
	bne	.Loop
	add	DST, 4, DST
	
	andcc	LENGTH, 3, LENGTH
	beq	.Ldone
	nop

.Lfinal2:
	C	DST address must be 2-aligned
	cmp	LENGTH, 2
	blu	.Lfinal1
	nop

	ldub	[SRC], DATA
	ARCFOUR_BYTE(I1, I2, TMP)
	ldub	[SRC + 1], TMP2
	add	SRC, 2, SRC
	xor	DATA, TMP, DATA
	sll	DATA, 8, DATA	

	ARCFOUR_BYTE(I2, I1, TMP)
	xor	TMP2, TMP, TMP
	or	DATA, TMP, DATA

	sth	DATA, [DST]
	beq	.Ldone
	add	DST, 2, DST

.Lfinal1:
	mov	I1, I2
	ldub	[SRC], DATA
	ARCFOUR_BYTE(I2, I1, TMP)
	xor	DATA, TMP, DATA
	stb	DATA, [DST]

.Ldone:
	C	Save back I and J
	sll	I2, 8, I2
	or	I2, J, I2
	stuh	I2, [CTX + ARCFOUR_I]

.Lend:
	ret
	restore

EPILOGUE(nettle_arcfour_crypt)

C	Stats for AES 128 on sellafield.lysator.liu.se (UE450, 296 MHz)

C 1:	nettle-1.13 C-code
C 2:	New assembler code (basically the same as for sparc32)

C	MB/s	cycles/byte
C 1:	3.6	77.7
C 2:	21.8	13.0