Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Brian Smith
nettle
Commits
d6fadad8
Commit
d6fadad8
authored
May 22, 2013
by
Niels Möller
Browse files
arm: Adapted AES assembly to new interface.
parent
7192dce7
Changes
5
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
d6fadad8
2013-05-22 Niels Möller <nisse@lysator.liu.se>
* arm/v6/aes-encrypt-internal.asm: Adapted to new interface.
Unfortunately, 4% slowdown on Cortex-A9, for unknown reason.
* arm/v6/aes-decrypt-internal.asm: Likewise.
* arm/aes-encrypt-internal.asm: Adapted to new interface.
* arm/aes-decrypt-internal.asm: Likewise.
2013-05-21 Niels Möller <nisse@lysator.liu.se>
* sparc32/aes-encrypt-internal.asm: Adapted to new interface.
...
...
arm/aes-decrypt-internal.asm
View file @
d6fadad8
...
...
@@ -19,26 +19,32 @@ C MA 02111-1301, USA.
include_src
(
<
arm
/
aes.m4
>
)
C
define
(
<
CTX
>
,
<
r0
>
)
define
(
<
TABLE
>
,
<
r1
>
)
define
(
<
LENGTH
>
,
<
r2
>
)
define
(
<
DS
T
>
,
<
r3
>
)
define
(
<
SRC
>
,
<
r12
>
)
define
(
<
PARAM_ROUNDS
>
,
<
r0
>
)
define
(
<
PARAM_KEYS
>
,
<
r1
>
)
define
(
<
TABLE
>
,
<
r2
>
)
define
(
<
PARAM_LENGTH
>
,
<
r3
>
)
C
On
stack
:
DS
T
,
SRC
define
(
<
W0
>
,
<
r4
>
)
define
(
<
W1
>
,
<
r5
>
)
define
(
<
W2
>
,
<
r6
>
)
define
(
<
W3
>
,
<
r7
>
)
define
(
<
T0
>
,
<
r8
>
)
define
(
<
KEY
>
,
<
r10
>
)
define
(
<
ROUND
>
,
<
r11
>
)
define
(
<
COUNT
>
,
<
r10
>
)
define
(
<
KEY
>
,
<
r11
>
)
define
(
<
X0
>
,
<
r2
>
)
C
Overlaps
LENGTH
,
SRC
,
DS
T
define
(
<
MASK
>
,
<
r0
>
)
C
Overlaps
inputs
,
except
TABLE
define
(
<
X0
>
,
<
r1
>
)
define
(
<
X1
>
,
<
r3
>
)
define
(
<
X2
>
,
<
r12
>
)
define
(
<
X3
>
,
<
r14
>
)
C
lr
define
(
<
MASK
>
,
<
r0
>
)
C
Overlaps
CTX
input
define
(
<
CTX
>
,
<
[
sp
]
>
)
define
(
<
FRAME_ROUNDS
>
,
<
[
sp
]
>
)
define
(
<
FRAME_KEYS
>
,
<
[
sp
,
#
+
4
]
>
)
define
(
<
FRAME_LENGTH
>
,
<
[
sp
,
#
+
8
]
>
)
C
8
saved
registers
define
(
<
FRAME_DST
>
,
<
[
sp
,
#
+
44
]
>
)
define
(
<
FRAME_SRC
>
,
<
[
sp
,
#
+
48
]
>
)
define
(
<
AES_DECRYPT_ROUND
>
,
<
...
...
@@ -103,29 +109,30 @@ define(<AES_DECRYPT_ROUND>, <
.file
"
aes
-
decrypt
-
internal.asm
"
C
_aes_decrypt
(
struct
aes_context
*
ctx
,
C
_aes_decrypt
(
unsigned
rounds
,
const
uint32_t
*
keys
,
C
const
struct
aes_table
*
T
,
C
si
ze_t
length
,
uint8_t
*
ds
t
,
C
uint8_t
*
src
)
.text
ALIGN
(
4
)
PROLOGUE
(
_nettle_aes_decrypt
)
teq
LENGTH
,
#
0
teq
PARAM_
LENGTH
,
#
0
beq
.Lend
ldr
SRC
,
[
sp
]
push
{
r0
,
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
push
{
r0
,
r1
,
r3
,
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
mov
MASK
,
#
0x3fc
ALIGN
(
16
)
.Lblock_loop:
ldr
KEY
,
CTX
ldr
ROUND
,
[
KEY
,
#
+
AES_NROUNDS
]
AES_LOAD
(
SRC
,
KEY
,
W0
)
AES_LOAD
(
SRC
,
KEY
,
W1
)
AES_LOAD
(
SRC
,
KEY
,
W2
)
AES_LOAD
(
SRC
,
KEY
,
W3
)
push
{
LENGTH
,
DS
T
,
SRC
}
ldr
X0
,
FRAME_SRC
C
Use
X0
as
SRC
pointer
ldm
sp
,
{
COUNT
,
KEY
}
AES_LOAD
(
X0
,
KEY
,
W0
)
AES_LOAD
(
X0
,
KEY
,
W1
)
AES_LOAD
(
X0
,
KEY
,
W2
)
AES_LOAD
(
X0
,
KEY
,
W3
)
str
X0
,
FRAME_SRC
add
TABLE
,
TABLE
,
#
AES_TABLE0
b
.Lentry
...
...
@@ -135,31 +142,35 @@ PROLOGUE(_nettle_aes_decrypt)
AES_DECRYPT_ROUND
(
X0
,
X1
,
X2
,
X3
,
W0
,
W1
,
W2
,
W3
,
KEY
)
.Lentry:
subs
R
OUN
D
,
R
OUN
D
,
#
2
subs
C
OUN
T
,
C
OUN
T
,
#
2
C
Transform
W
->
X
AES_DECRYPT_ROUND
(
W0
,
W1
,
W2
,
W3
,
X0
,
X1
,
X2
,
X3
,
KEY
)
bne
.Lround_loop
lsr
R
OUN
D
,
MASK
,
#
2
C
Put
the
needed
mask
in
the
unused
R
OUN
D
register
lsr
C
OUN
T
,
MASK
,
#
2
C
Put
the
needed
mask
in
the
unused
C
OUN
T
register
sub
TABLE
,
TABLE
,
#
AES_TABLE0
C
Final
round
AES_FINAL_ROUND_V5
(
X0
,
X3
,
X2
,
X1
,
KEY
,
W0
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X1
,
X0
,
X3
,
X2
,
KEY
,
W1
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X2
,
X1
,
X0
,
X3
,
KEY
,
W2
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X3
,
X2
,
X1
,
X0
,
KEY
,
W3
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X0
,
X3
,
X2
,
X1
,
KEY
,
W0
,
C
OUN
T
)
AES_FINAL_ROUND_V5
(
X1
,
X0
,
X3
,
X2
,
KEY
,
W1
,
C
OUN
T
)
AES_FINAL_ROUND_V5
(
X2
,
X1
,
X0
,
X3
,
KEY
,
W2
,
C
OUN
T
)
AES_FINAL_ROUND_V5
(
X3
,
X2
,
X1
,
X0
,
KEY
,
W3
,
C
OUN
T
)
pop
{
LENGTH
,
DS
T
,
SRC
}
AES_STORE
(
DS
T
,
W0
)
AES_STORE
(
DS
T
,
W1
)
AES_STORE
(
DS
T
,
W2
)
AES_STORE
(
DS
T
,
W3
)
ldr
X0
,
FRAME_DST
ldr
X1
,
FRAME_LENGTH
AES_STORE
(
X0
,
W0
)
AES_STORE
(
X0
,
W1
)
AES_STORE
(
X0
,
W2
)
AES_STORE
(
X0
,
W3
)
subs
X1
,
X1
,
#
16
str
X0
,
FRAME_DST
str
X1
,
FRAME_LENGTH
subs
LENGTH
,
LENGTH
,
#
16
bhi
.Lblock_loop
add
sp
,
sp
,
#
4
C
Drop
saved
r0
add
sp
,
sp
,
#
12
C
Drop
saved
r0
,
r1
,
r3
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
pc
}
.Lend:
...
...
arm/aes-encrypt-internal.asm
View file @
d6fadad8
...
...
@@ -19,32 +19,38 @@ C MA 02111-1301, USA.
include_src
(
<
arm
/
aes.m4
>
)
C
Benchmarked
at
at
725
,
930
,
990
cycles
/
bl
ock
on
cortex
A9
,
C
Benchmarked
at
at
725
,
815
,
990
cycles
/
bl
ock
on
cortex
A9
,
C
for
128
,
192
and
256
bit
key
si
zes.
C
Possible
improvements
:
More
efficient
load
and
store
with
C
al
igned
accesses.
Better
scheduling.
C
define
(
<
CTX
>
,
<
r0
>
)
define
(
<
TABLE
>
,
<
r1
>
)
define
(
<
LENGTH
>
,
<
r2
>
)
define
(
<
DS
T
>
,
<
r3
>
)
define
(
<
SRC
>
,
<
r12
>
)
define
(
<
PARAM_ROUNDS
>
,
<
r0
>
)
define
(
<
PARAM_KEYS
>
,
<
r1
>
)
define
(
<
TABLE
>
,
<
r2
>
)
define
(
<
PARAM_LENGTH
>
,
<
r3
>
)
C
On
stack
:
DS
T
,
SRC
define
(
<
W0
>
,
<
r4
>
)
define
(
<
W1
>
,
<
r5
>
)
define
(
<
W2
>
,
<
r6
>
)
define
(
<
W3
>
,
<
r7
>
)
define
(
<
T0
>
,
<
r8
>
)
define
(
<
KEY
>
,
<
r10
>
)
define
(
<
ROUND
>
,
<
r11
>
)
define
(
<
COUNT
>
,
<
r10
>
)
define
(
<
KEY
>
,
<
r11
>
)
define
(
<
X0
>
,
<
r2
>
)
C
Overlaps
LENGTH
,
SRC
,
DS
T
define
(
<
MASK
>
,
<
r0
>
)
C
Overlaps
inputs
,
except
TABLE
define
(
<
X0
>
,
<
r1
>
)
define
(
<
X1
>
,
<
r3
>
)
define
(
<
X2
>
,
<
r12
>
)
define
(
<
X3
>
,
<
r14
>
)
C
lr
define
(
<
MASK
>
,
<
r0
>
)
C
Overlaps
CTX
input
define
(
<
CTX
>
,
<
[
sp
]
>
)
define
(
<
FRAME_ROUNDS
>
,
<
[
sp
]
>
)
define
(
<
FRAME_KEYS
>
,
<
[
sp
,
#
+
4
]
>
)
define
(
<
FRAME_LENGTH
>
,
<
[
sp
,
#
+
8
]
>
)
C
8
saved
registers
define
(
<
FRAME_DST
>
,
<
[
sp
,
#
+
44
]
>
)
define
(
<
FRAME_SRC
>
,
<
[
sp
,
#
+
48
]
>
)
C
AES_ENCRYPT_ROUND
(
x0
,
x1
,
x2
,
x3
,
w0
,
w1
,
w2
,
w3
,
key
)
...
...
@@ -112,29 +118,30 @@ define(<AES_ENCRYPT_ROUND>, <
.file
"
aes
-
encrypt
-
internal.asm
"
C
_aes_encrypt
(
struct
aes_context
*
ctx
,
C
_aes_encrypt
(
unsigned
rounds
,
const
uint32_t
*
keys
,
C
const
struct
aes_table
*
T
,
C
si
ze_t
length
,
uint8_t
*
ds
t
,
C
uint8_t
*
src
)
.text
ALIGN
(
4
)
PROLOGUE
(
_nettle_aes_encrypt
)
teq
LENGTH
,
#
0
teq
PARAM_
LENGTH
,
#
0
beq
.Lend
ldr
SRC
,
[
sp
]
push
{
r0
,
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
push
{
r0
,
r1
,
r3
,
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
mov
MASK
,
#
0x3fc
ALIGN
(
16
)
.Lblock_loop:
ldr
KEY
,
CTX
ldr
ROUND
,
[
KEY
,
#
+
AES_NROUNDS
]
AES_LOAD
(
SRC
,
KEY
,
W0
)
AES_LOAD
(
SRC
,
KEY
,
W1
)
AES_LOAD
(
SRC
,
KEY
,
W2
)
AES_LOAD
(
SRC
,
KEY
,
W3
)
push
{
LENGTH
,
DS
T
,
SRC
}
ldr
X0
,
FRAME_SRC
C
Use
X0
as
SRC
pointer
ldm
sp
,
{
COUNT
,
KEY
}
AES_LOAD
(
X0
,
KEY
,
W0
)
AES_LOAD
(
X0
,
KEY
,
W1
)
AES_LOAD
(
X0
,
KEY
,
W2
)
AES_LOAD
(
X0
,
KEY
,
W3
)
str
X0
,
FRAME_SRC
add
TABLE
,
TABLE
,
#
AES_TABLE0
b
.Lentry
...
...
@@ -144,31 +151,35 @@ PROLOGUE(_nettle_aes_encrypt)
AES_ENCRYPT_ROUND
(
X0
,
X1
,
X2
,
X3
,
W0
,
W1
,
W2
,
W3
,
KEY
)
.Lentry:
subs
R
OUN
D
,
R
OUN
D
,
#
2
subs
C
OUN
T
,
C
OUN
T
,
#
2
C
Transform
W
->
X
AES_ENCRYPT_ROUND
(
W0
,
W1
,
W2
,
W3
,
X0
,
X1
,
X2
,
X3
,
KEY
)
bne
.Lround_loop
lsr
R
OUN
D
,
MASK
,
#
2
C
Put
the
needed
mask
in
the
unused
R
OUN
D
register
lsr
C
OUN
T
,
MASK
,
#
2
C
Put
the
needed
mask
in
the
unused
C
OUN
T
register
sub
TABLE
,
TABLE
,
#
AES_TABLE0
C
Final
round
AES_FINAL_ROUND_V5
(
X0
,
X1
,
X2
,
X3
,
KEY
,
W0
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X1
,
X2
,
X3
,
X0
,
KEY
,
W1
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X2
,
X3
,
X0
,
X1
,
KEY
,
W2
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X3
,
X0
,
X1
,
X2
,
KEY
,
W3
,
R
OUN
D
)
AES_FINAL_ROUND_V5
(
X0
,
X1
,
X2
,
X3
,
KEY
,
W0
,
C
OUN
T
)
AES_FINAL_ROUND_V5
(
X1
,
X2
,
X3
,
X0
,
KEY
,
W1
,
C
OUN
T
)
AES_FINAL_ROUND_V5
(
X2
,
X3
,
X0
,
X1
,
KEY
,
W2
,
C
OUN
T
)
AES_FINAL_ROUND_V5
(
X3
,
X0
,
X1
,
X2
,
KEY
,
W3
,
C
OUN
T
)
pop
{
LENGTH
,
DS
T
,
SRC
}
AES_STORE
(
DS
T
,
W0
)
AES_STORE
(
DS
T
,
W1
)
AES_STORE
(
DS
T
,
W2
)
AES_STORE
(
DS
T
,
W3
)
ldr
X0
,
FRAME_DST
ldr
X1
,
FRAME_LENGTH
AES_STORE
(
X0
,
W0
)
AES_STORE
(
X0
,
W1
)
AES_STORE
(
X0
,
W2
)
AES_STORE
(
X0
,
W3
)
subs
X1
,
X1
,
#
16
str
X0
,
FRAME_DST
str
X1
,
FRAME_LENGTH
subs
LENGTH
,
LENGTH
,
#
16
bhi
.Lblock_loop
add
sp
,
sp
,
#
4
C
Drop
saved
r0
add
sp
,
sp
,
#
12
C
Drop
saved
r0
,
r1
,
r3
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
pc
}
.Lend:
...
...
arm/v6/aes-decrypt-internal.asm
View file @
d6fadad8
...
...
@@ -19,25 +19,33 @@ C MA 02111-1301, USA.
include_src
(
<
arm
/
aes.m4
>
)
define
(
<
CTX
>
,
<
r0
>
)
define
(
<
TABLE
>
,
<
r1
>
)
define
(
<
LENGTH
>
,
<
r2
>
)
define
(
<
DS
T
>
,
<
r3
>
)
define
(
<
SRC
>
,
<
r12
>
)
define
(
<
PARAM_ROUNDS
>
,
<
r0
>
)
define
(
<
PARAM_KEYS
>
,
<
r1
>
)
define
(
<
TABLE
>
,
<
r2
>
)
define
(
<
LENGTH
>
,
<
r3
>
)
C
On
stack
:
DS
T
,
SRC
define
(
<
W0
>
,
<
r4
>
)
define
(
<
W1
>
,
<
r5
>
)
define
(
<
W2
>
,
<
r6
>
)
define
(
<
W3
>
,
<
r7
>
)
define
(
<
T0
>
,
<
r8
>
)
define
(
<
KEY
>
,
<
r10
>
)
define
(
<
ROUND
>
,
<
r11
>
)
define
(
<
COUNT
>
,
<
r10
>
)
define
(
<
KEY
>
,
<
r11
>
)
define
(
<
X0
>
,
<
r
2
>
)
C
Overlaps
LENGTH
,
SRC
,
DS
T
define
(
<
X1
>
,
<
r
3
>
)
define
(
<
X0
>
,
<
r
0
>
)
C
Overlaps
PARAM_ROUNDS
and
PARAM_KEYS
define
(
<
X1
>
,
<
r
1
>
)
define
(
<
X2
>
,
<
r12
>
)
define
(
<
X3
>
,
<
r14
>
)
C
lr
define
(
<
FRAME_ROUNDS
>>
,
<
[
sp
]
>
)
define
(
<
FRAME_KEYS
>
,
<
[
sp
,
#
+
4
]
>
)
C
8
saved
registers
define
(
<
FRAME_DST
>
,
<
[
sp
,
#
+
40
]
>
)
define
(
<
FRAME_SRC
>
,
<
[
sp
,
#
+
44
]
>
)
define
(
<
SRC
>
,
<%
r12
>
)
C
Overlap
registers
used
in
inner
loop.
define
(
<
DS
T
>
,
<
COUNT
>
)
C
AES_DECRYPT_ROUND
(
x0
,
x1
,
x2
,
x3
,
w0
,
w1
,
w2
,
w3
,
key
)
define
(
<
AES_DECRYPT_ROUND
>
,
<
...
...
@@ -102,7 +110,7 @@ define(<AES_DECRYPT_ROUND>, <
.file
"
aes
-
decrypt
-
internal.asm
"
C
_aes_decrypt
(
struct
aes_context
*
ctx
,
C
_aes_decrypt
(
unsigned
rounds
,
const
uint32_t
*
keys
,
C
const
struct
aes_table
*
T
,
C
si
ze_t
length
,
uint8_t
*
ds
t
,
C
uint8_t
*
src
)
...
...
@@ -111,22 +119,23 @@ define(<AES_DECRYPT_ROUND>, <
PROLOGUE
(
_nettle_aes_decrypt
)
teq
LENGTH
,
#
0
beq
.Lend
ldr
SRC
,
[
sp
]
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
nop
C
For
some
mysterious
reason
,
taking
out
this
nop
C
slows
this
function
down
by
10
(
!
)
%
on
Cortex
-
A9.
ldr
SRC
,
[
sp
,
#
+
4
]
push
{
r0
,
r1
,
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
ALIGN
(
16
)
.Lblock_loop:
mov
KEY
,
CTX
ldm
sp
,
{
COUNT
,
KEY
}
add
TABLE
,
TABLE
,
#
AES_TABLE0
AES_LOAD
(
SRC
,
KEY
,
W0
)
AES_LOAD
(
SRC
,
KEY
,
W1
)
AES_LOAD
(
SRC
,
KEY
,
W2
)
AES_LOAD
(
SRC
,
KEY
,
W3
)
push
{
LENGTH
,
DS
T
,
SRC
}
ldr
ROUND
,
[
CTX
,
#
+
AES_NROUNDS
]
add
TABLE
,
TABLE
,
#
AES_TABLE0
str
SRC
,
FRAME_SRC
b
.Lentry
ALIGN
(
16
)
...
...
@@ -135,29 +144,34 @@ PROLOGUE(_nettle_aes_decrypt)
AES_DECRYPT_ROUND
(
X0
,
X1
,
X2
,
X3
,
W0
,
W1
,
W2
,
W3
,
KEY
)
.Lentry:
subs
R
OUN
D
,
R
OUN
D
,
#
2
subs
C
OUN
T
,
C
OUN
T
,
#
2
C
Transform
W
->
X
AES_DECRYPT_ROUND
(
W0
,
W1
,
W2
,
W3
,
X0
,
X1
,
X2
,
X3
,
KEY
)
bne
.Lround_loop
sub
TABLE
,
TABLE
,
#
AES_TABLE0
C
Final
round
ldr
DS
T
,
FRAME_DST
AES_FINAL_ROUND_V6
(
X0
,
X3
,
X2
,
X1
,
KEY
,
W0
)
AES_FINAL_ROUND_V6
(
X1
,
X0
,
X3
,
X2
,
KEY
,
W1
)
AES_FINAL_ROUND_V6
(
X2
,
X1
,
X0
,
X3
,
KEY
,
W2
)
AES_FINAL_ROUND_V6
(
X3
,
X2
,
X1
,
X0
,
KEY
,
W3
)
pop
{
LENGTH
,
DS
T
,
SRC
}
ldr
SRC
,
FRAME_
SRC
AES_STORE
(
DS
T
,
W0
)
AES_STORE
(
DS
T
,
W1
)
AES_STORE
(
DS
T
,
W2
)
AES_STORE
(
DS
T
,
W3
)
str
DS
T
,
FRAME_DST
subs
LENGTH
,
LENGTH
,
#
16
bhi
.Lblock_loop
add
sp
,
sp
,
#
8
C
Drop
saved
r0
,
r1
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
pc
}
.Lend:
...
...
arm/v6/aes-encrypt-internal.asm
View file @
d6fadad8
...
...
@@ -19,31 +19,39 @@ C MA 02111-1301, USA.
include_src
(
<
arm
/
aes.m4
>
)
C
Benchmarked
at
at
680
,
8
18
,
9
29
cycles
/
bl
ock
on
cortex
A9
,
C
Benchmarked
at
at
706
,
8
70
,
9
63
cycles
/
bl
ock
on
cortex
A9
,
C
for
128
,
192
and
256
bit
key
si
zes.
C
Possible
improvements
:
More
efficient
load
and
store
with
C
al
igned
accesses.
Better
scheduling.
define
(
<
CTX
>
,
<
r0
>
)
define
(
<
TABLE
>
,
<
r1
>
)
define
(
<
LENGTH
>
,
<
r2
>
)
define
(
<
DS
T
>
,
<
r3
>
)
define
(
<
SRC
>
,
<
r12
>
)
define
(
<
PARAM_ROUNDS
>
,
<
r0
>
)
define
(
<
PARAM_KEYS
>
,
<
r1
>
)
define
(
<
TABLE
>
,
<
r2
>
)
define
(
<
LENGTH
>
,
<
r3
>
)
C
On
stack
:
DS
T
,
SRC
define
(
<
W0
>
,
<
r4
>
)
define
(
<
W1
>
,
<
r5
>
)
define
(
<
W2
>
,
<
r6
>
)
define
(
<
W3
>
,
<
r7
>
)
define
(
<
T0
>
,
<
r8
>
)
define
(
<
KEY
>
,
<
r10
>
)
define
(
<
ROUND
>
,
<
r11
>
)
define
(
<
COUNT
>
,
<
r10
>
)
define
(
<
KEY
>
,
<
r11
>
)
define
(
<
X0
>
,
<
r
2
>
)
C
Overlaps
LENGTH
,
SRC
,
DS
T
define
(
<
X1
>
,
<
r
3
>
)
define
(
<
X0
>
,
<
r
0
>
)
C
Overlaps
PARAM_ROUNDS
and
PARAM_KEYS
define
(
<
X1
>
,
<
r
1
>
)
define
(
<
X2
>
,
<
r12
>
)
define
(
<
X3
>
,
<
r14
>
)
C
lr
define
(
<
FRAME_ROUNDS
>>
,
<
[
sp
]
>
)
define
(
<
FRAME_KEYS
>
,
<
[
sp
,
#
+
4
]
>
)
C
8
saved
registers
define
(
<
FRAME_DST
>
,
<
[
sp
,
#
+
40
]
>
)
define
(
<
FRAME_SRC
>
,
<
[
sp
,
#
+
44
]
>
)
define
(
<
SRC
>
,
<%
r12
>
)
C
Overlap
registers
used
in
inner
loop.
define
(
<
DS
T
>
,
<
COUNT
>
)
C
53
instr.
C
It
'
s
tempting
to
use
eor
with
rotation
,
but
that
'
s
slower.
...
...
@@ -110,7 +118,7 @@ define(<AES_ENCRYPT_ROUND>, <
.file
"
aes
-
encrypt
-
internal.asm
"
C
_aes_encrypt
(
struct
aes_context
*
ctx
,
C
_aes_encrypt
(
unsigned
rounds
,
const
uint32_t
*
keys
,
C
const
struct
aes_table
*
T
,
C
si
ze_t
length
,
uint8_t
*
ds
t
,
C
uint8_t
*
src
)
...
...
@@ -119,20 +127,23 @@ define(<AES_ENCRYPT_ROUND>, <
PROLOGUE
(
_nettle_aes_encrypt
)
teq
LENGTH
,
#
0
beq
.Lend
ldr
SRC
,
[
sp
]
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
ldr
SRC
,
[
sp
,
#
+
4
]
push
{
r0
,
r1
,
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
ALIGN
(
16
)
.Lblock_loop:
mov
KEY
,
CTX
ldm
sp
,
{
COUNT
,
KEY
}
add
TABLE
,
TABLE
,
#
AES_TABLE0
AES_LOAD
(
SRC
,
KEY
,
W0
)
AES_LOAD
(
SRC
,
KEY
,
W1
)
AES_LOAD
(
SRC
,
KEY
,
W2
)
AES_LOAD
(
SRC
,
KEY
,
W3
)
push
{
LENGTH
,
DS
T
,
SRC
}
ldr
ROUND
,
[
CTX
,
#
+
AES_NROUNDS
]
add
TABLE
,
TABLE
,
#
AES_TABLE0
str
SRC
,
FRAME_SRC
b
.Lentry
ALIGN
(
16
)
...
...
@@ -141,29 +152,34 @@ PROLOGUE(_nettle_aes_encrypt)
AES_ENCRYPT_ROUND
(
X0
,
X1
,
X2
,
X3
,
W0
,
W1
,
W2
,
W3
,
KEY
)
.Lentry:
subs
R
OUN
D
,
R
OUN
D
,
#
2
subs
C
OUN
T
,
C
OUN
T
,
#
2
C
Transform
W
->
X
AES_ENCRYPT_ROUND
(
W0
,
W1
,
W2
,
W3
,
X0
,
X1
,
X2
,
X3
,
KEY
)
bne
.Lround_loop
sub
TABLE
,
TABLE
,
#
AES_TABLE0
C
Final
round
ldr
DS
T
,
FRAME_DST
AES_FINAL_ROUND_V6
(
X0
,
X1
,
X2
,
X3
,
KEY
,
W0
)
AES_FINAL_ROUND_V6
(
X1
,
X2
,
X3
,
X0
,
KEY
,
W1
)
AES_FINAL_ROUND_V6
(
X2
,
X3
,
X0
,
X1
,
KEY
,
W2
)
AES_FINAL_ROUND_V6
(
X3
,
X0
,
X1
,
X2
,
KEY
,
W3
)
pop
{
LENGTH
,
DS
T
,
SRC
}
ldr
SRC
,
FRAME_
SRC
AES_STORE
(
DS
T
,
W0
)
AES_STORE
(
DS
T
,
W1
)
AES_STORE
(
DS
T
,
W2
)
AES_STORE
(
DS
T
,
W3
)
str
DS
T
,
FRAME_DST
subs
LENGTH
,
LENGTH
,
#
16
bhi
.Lblock_loop
add
sp
,
sp
,
#
8
C
Drop
saved
r0
,
r1
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
pc
}
.Lend:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment