aes-encrypt-internal.asm 4.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
C nettle, low-level cryptographics library
C 
C Copyright (C) 2013 Niels Möller
C  
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C 
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
C License for more details.
C 
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB.  If not, write to
C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
C MA 02111-1301, USA.

include_src(<arm/aes.m4>)

22
C	Benchmarked at at 725, 815, 990 cycles/block on cortex A9,
23 24 25 26 27
C	for 128, 192 and 256 bit key sizes.

C	Possible improvements: More efficient load and store with
C	aligned accesses. Better scheduling.

28 29 30 31 32 33
define(<PARAM_ROUNDS>, <r0>)
define(<PARAM_KEYS>, <r1>)
define(<TABLE>, <r2>)
define(<PARAM_LENGTH>, <r3>)
C On stack: DST, SRC
	
34 35 36 37 38
define(<W0>, <r4>)
define(<W1>, <r5>)
define(<W2>, <r6>)
define(<W3>, <r7>)
define(<T0>, <r8>)
39 40
define(<COUNT>, <r10>)
define(<KEY>, <r11>)
41

42 43
define(<MASK>, <r0>)	C Overlaps inputs, except TABLE
define(<X0>, <r1>)
44 45 46
define(<X1>, <r3>)
define(<X2>, <r12>)
define(<X3>, <r14>)	C lr
47 48 49 50 51 52 53

define(<FRAME_ROUNDS>,  <[sp]>)
define(<FRAME_KEYS>,  <[sp, #+4]>)
define(<FRAME_LENGTH>,  <[sp, #+8]>)
C 8 saved registers
define(<FRAME_DST>,  <[sp, #+44]>)
define(<FRAME_SRC>,  <[sp, #+48]>)
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120


C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key)
C MASK should hold the constant 0x3fc.
define(<AES_ENCRYPT_ROUND>, <

	and	T0, MASK, $1, lsl #2
	ldr	$5, [TABLE, T0]
	and	T0, MASK, $2, lsl #2
	ldr	$6, [TABLE, T0]
	and	T0, MASK, $3, lsl #2
	ldr	$7, [TABLE, T0]
	and	T0, MASK, $4, lsl #2
	ldr	$8, [TABLE, T0]

	and	T0, MASK, $2, ror #6
	add	TABLE, TABLE, #1024
	ldr	T0, [TABLE, T0]
	eor	$5, $5, T0
	and	T0, MASK, $3, ror #6
	ldr	T0, [TABLE, T0]
	eor	$6, $6, T0
	and	T0, MASK, $4, ror #6
	ldr	T0, [TABLE, T0]
	eor	$7, $7, T0
	and	T0, MASK, $1, ror #6
	ldr	T0, [TABLE, T0]
	eor	$8, $8, T0

	and	T0, MASK, $3, ror #14
	add	TABLE, TABLE, #1024
	ldr	T0, [TABLE, T0]
	eor	$5, $5, T0
	and	T0, MASK, $4, ror #14
	ldr	T0, [TABLE, T0]
	eor	$6, $6, T0
	and	T0, MASK, $1, ror #14
	ldr	T0, [TABLE, T0]
	eor	$7, $7, T0
	and	T0, MASK, $2, ror #14
	ldr	T0, [TABLE, T0]
	eor	$8, $8, T0

	and	T0, MASK, $4, ror #22
	add	TABLE, TABLE, #1024
	ldr	T0, [TABLE, T0]
	eor	$5, $5, T0
	and	T0, MASK, $1, ror #22
	ldr	T0, [TABLE, T0]
	eor	$6, $6, T0
	and	T0, MASK, $2, ror #22
	ldr	T0, [TABLE, T0]
	eor	$7, $7, T0
	and	T0, MASK, $3, ror #22
	ldr	T0, [TABLE, T0]

	ldm	$9!, {$1,$2,$3,$4}
	eor	$8, $8, T0
	sub	TABLE, TABLE, #3072
	eor	$5, $5, $1
	eor	$6, $6, $2
	eor	$7, $7, $3
	eor	$8, $8, $4
>)

	.file "aes-encrypt-internal.asm"
	
121
	C _aes_encrypt(unsigned rounds, const uint32_t *keys,
122 123 124 125 126 127
	C	       const struct aes_table *T,
	C	       size_t length, uint8_t *dst,
	C	       uint8_t *src)
	.text
	ALIGN(4)
PROLOGUE(_nettle_aes_encrypt)
128
	teq	PARAM_LENGTH, #0
129 130
	beq	.Lend

131
	push	{r0,r1,r3, r4,r5,r6,r7,r8,r10,r11,lr}
132 133 134
	mov	MASK, #0x3fc
	ALIGN(16)
.Lblock_loop:
135 136 137 138 139 140 141 142 143 144
	ldr	X0, FRAME_SRC		C Use X0 as SRC pointer
	ldm	sp, {COUNT, KEY}

	AES_LOAD(X0,KEY,W0)
	AES_LOAD(X0,KEY,W1)
	AES_LOAD(X0,KEY,W2)
	AES_LOAD(X0,KEY,W3)

	str	X0, FRAME_SRC

145 146 147 148 149 150 151 152 153
	add	TABLE, TABLE, #AES_TABLE0

	b	.Lentry
	ALIGN(16)
.Lround_loop:
	C	Transform X -> W
	AES_ENCRYPT_ROUND(X0, X1, X2, X3, W0, W1, W2, W3, KEY)
	
.Lentry:
154
	subs	COUNT, COUNT,#2
155 156 157 158 159
	C	Transform W -> X
	AES_ENCRYPT_ROUND(W0, W1, W2, W3, X0, X1, X2, X3, KEY)

	bne	.Lround_loop

160
	lsr	COUNT, MASK, #2	C Put the needed mask in the unused COUNT register
161 162
	sub	TABLE, TABLE, #AES_TABLE0
	C	Final round
163 164 165 166
	AES_FINAL_ROUND_V5(X0, X1, X2, X3, KEY, W0, COUNT)
	AES_FINAL_ROUND_V5(X1, X2, X3, X0, KEY, W1, COUNT)
	AES_FINAL_ROUND_V5(X2, X3, X0, X1, KEY, W2, COUNT)
	AES_FINAL_ROUND_V5(X3, X0, X1, X2, KEY, W3, COUNT)
167

168 169 170 171 172 173 174 175 176 177 178
	ldr	X0, FRAME_DST
	ldr	X1, FRAME_LENGTH

	AES_STORE(X0,W0)
	AES_STORE(X0,W1)
	AES_STORE(X0,W2)
	AES_STORE(X0,W3)

	subs	X1, X1, #16
	str	X0, FRAME_DST
	str	X1, FRAME_LENGTH
179 180 181

	bhi	.Lblock_loop

182
	add	sp, sp, #12	C Drop saved r0, r1, r3
183 184 185 186 187
	pop	{r4,r5,r6,r7,r8,r10,r11,pc}
	
.Lend:
	bx	lr
EPILOGUE(_nettle_aes_encrypt)