Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Nettle
nettle
Commits
4ac1b5f0
Commit
4ac1b5f0
authored
Mar 08, 2013
by
Niels Möller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ARM assembly for AES.
parent
32f3ba18
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
382 additions
and
0 deletions
+382
-0
ChangeLog
ChangeLog
+6
-0
armv7/aes-decrypt-internal.asm
armv7/aes-decrypt-internal.asm
+105
-0
armv7/aes-encrypt-internal.asm
armv7/aes-encrypt-internal.asm
+107
-0
armv7/aes.m4
armv7/aes.m4
+164
-0
No files found.
ChangeLog
View file @
4ac1b5f0
2013-03-08 Niels Möller <nisse@lysator.liu.se>
* armv7/aes-decrypt-internal.asm: New file, 15% speedup.
* armv7/aes-encrypt-internal.asm: New file, 25% speedup.
* armv7/aes.m4: New file.
2013-03-07 Niels Möller <nisse@lysator.liu.se>
2013-03-07 Niels Möller <nisse@lysator.liu.se>
* gmp-glue.c (mpz_limbs_cmp): Don't use PTR and SIZ macros.
* gmp-glue.c (mpz_limbs_cmp): Don't use PTR and SIZ macros.
...
...
armv7/aes-decrypt-internal.asm
0 → 100644
View file @
4ac1b5f0
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2013
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
C
MA
02111
-
1301
,
USA.
include_src
(
<
armv7
/
aes.m4
>
)
C
Benchmarked
at
at
785
,
914
,
1051
cycles
/
bl
ock
on
cortex
A9
,
C
for
128
,
192
and
256
bit
key
si
zes.
Unclear
why
it
is
slower
C
than
_aes_encrypt.
define
(
<
CTX
>
,
<
r0
>
)
define
(
<
TABLE
>
,
<
r1
>
)
define
(
<
LENGTH
>
,
<
r2
>
)
define
(
<
DS
T
>
,
<
r3
>
)
define
(
<
SRC
>
,
<
r12
>
)
define
(
<
W0
>
,
<
r4
>
)
define
(
<
W1
>
,
<
r5
>
)
define
(
<
W2
>
,
<
r6
>
)
define
(
<
W3
>
,
<
r7
>
)
define
(
<
T0
>
,
<
r8
>
)
define
(
<
KEY
>
,
<
r10
>
)
define
(
<
ROUND
>
,
<
r11
>
)
define
(
<
X0
>
,
<
r2
>
)
C
Overlaps
LENGTH
,
SRC
,
DS
T
define
(
<
X1
>
,
<
r3
>
)
define
(
<
X2
>
,
<
r12
>
)
define
(
<
X3
>
,
<
r14
>
)
C
lr
.file
"
aes
-
decrypt
-
internal.asm
"
C
_aes_decrypt
(
struct
aes_context
*
ctx
,
C
const
struct
aes_table
*
T
,
C
unsigned
length
,
uint8_t
*
ds
t
,
C
uint8_t
*
src
)
.text
.align
2
PROLOGUE
(
_nettle_aes_decrypt
)
teq
LENGTH
,
#
0
beq
.Lend
ldr
SRC
,
[
sp
]
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
.Lblock_loop:
mov
KEY
,
CTX
AES_LOAD
(
SRC
,
KEY
,
W0
)
AES_LOAD
(
SRC
,
KEY
,
W1
)
AES_LOAD
(
SRC
,
KEY
,
W2
)
AES_LOAD
(
SRC
,
KEY
,
W3
)
push
{
LENGTH
,
DS
T
,
SRC
}
ldr
ROUND
,
[
CTX
,
#
+
AES_NROUNDS
]
add
TABLE
,
TABLE
,
#
AES_TABLE0
b
.Lentry
.align
2
.Lround_loop:
C
Transform
X
->
W
AES_DECRYPT_ROUND
(
X0
,
X1
,
X2
,
X3
,
W0
,
W1
,
W2
,
W3
,
KEY
)
.Lentry:
subs
ROUND
,
ROUND
,
#
2
C
Transform
W
->
X
AES_DECRYPT_ROUND
(
W0
,
W1
,
W2
,
W3
,
X0
,
X1
,
X2
,
X3
,
KEY
)
bne
.Lround_loop
sub
TABLE
,
TABLE
,
#
AES_TABLE0
C
Final
round
AES_FINAL_ROUND
(
X0
,
X3
,
X2
,
X1
,
KEY
,
W0
)
AES_FINAL_ROUND
(
X1
,
X0
,
X3
,
X2
,
KEY
,
W1
)
AES_FINAL_ROUND
(
X2
,
X1
,
X0
,
X3
,
KEY
,
W2
)
AES_FINAL_ROUND
(
X3
,
X2
,
X1
,
X0
,
KEY
,
W3
)
pop
{
LENGTH
,
DS
T
,
SRC
}
AES_STORE
(
DS
T
,
W0
)
AES_STORE
(
DS
T
,
W1
)
AES_STORE
(
DS
T
,
W2
)
AES_STORE
(
DS
T
,
W3
)
subs
LENGTH
,
LENGTH
,
#
16
bhi
.Lblock_loop
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
pc
}
.Lend:
bx
lr
EPILOGUE
(
_nettle_aes_decrypt
)
armv7/aes-encrypt-internal.asm
0 → 100644
View file @
4ac1b5f0
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2013
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
C
MA
02111
-
1301
,
USA.
include_src
(
<
armv7
/
aes.m4
>
)
C
Benchmarked
at
at
693
,
824
,
950
cycles
/
bl
ock
on
cortex
A9
,
C
for
128
,
192
and
256
bit
key
si
zes.
C
Possible
improvements
:
More
efficient
load
and
store
with
C
al
igned
accesses.
Better
scheduling.
define
(
<
CTX
>
,
<
r0
>
)
define
(
<
TABLE
>
,
<
r1
>
)
define
(
<
LENGTH
>
,
<
r2
>
)
define
(
<
DS
T
>
,
<
r3
>
)
define
(
<
SRC
>
,
<
r12
>
)
define
(
<
W0
>
,
<
r4
>
)
define
(
<
W1
>
,
<
r5
>
)
define
(
<
W2
>
,
<
r6
>
)
define
(
<
W3
>
,
<
r7
>
)
define
(
<
T0
>
,
<
r8
>
)
define
(
<
KEY
>
,
<
r10
>
)
define
(
<
ROUND
>
,
<
r11
>
)
define
(
<
X0
>
,
<
r2
>
)
C
Overlaps
LENGTH
,
SRC
,
DS
T
define
(
<
X1
>
,
<
r3
>
)
define
(
<
X2
>
,
<
r12
>
)
define
(
<
X3
>
,
<
r14
>
)
C
lr
.file
"
aes
-
encrypt
-
internal.asm
"
C
_aes_encrypt
(
struct
aes_context
*
ctx
,
C
const
struct
aes_table
*
T
,
C
unsigned
length
,
uint8_t
*
ds
t
,
C
uint8_t
*
src
)
.text
.align
2
PROLOGUE
(
_nettle_aes_encrypt
)
teq
LENGTH
,
#
0
beq
.Lend
ldr
SRC
,
[
sp
]
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
lr
}
.Lblock_loop:
mov
KEY
,
CTX
AES_LOAD
(
SRC
,
KEY
,
W0
)
AES_LOAD
(
SRC
,
KEY
,
W1
)
AES_LOAD
(
SRC
,
KEY
,
W2
)
AES_LOAD
(
SRC
,
KEY
,
W3
)
push
{
LENGTH
,
DS
T
,
SRC
}
ldr
ROUND
,
[
CTX
,
#
+
AES_NROUNDS
]
add
TABLE
,
TABLE
,
#
AES_TABLE0
b
.Lentry
.align
2
.Lround_loop:
C
Transform
X
->
W
AES_ENCRYPT_ROUND
(
X0
,
X1
,
X2
,
X3
,
W0
,
W1
,
W2
,
W3
,
KEY
)
.Lentry:
subs
ROUND
,
ROUND
,
#
2
C
Transform
W
->
X
AES_ENCRYPT_ROUND
(
W0
,
W1
,
W2
,
W3
,
X0
,
X1
,
X2
,
X3
,
KEY
)
bne
.Lround_loop
sub
TABLE
,
TABLE
,
#
AES_TABLE0
C
Final
round
AES_FINAL_ROUND
(
X0
,
X1
,
X2
,
X3
,
KEY
,
W0
)
AES_FINAL_ROUND
(
X1
,
X2
,
X3
,
X0
,
KEY
,
W1
)
AES_FINAL_ROUND
(
X2
,
X3
,
X0
,
X1
,
KEY
,
W2
)
AES_FINAL_ROUND
(
X3
,
X0
,
X1
,
X2
,
KEY
,
W3
)
pop
{
LENGTH
,
DS
T
,
SRC
}
AES_STORE
(
DS
T
,
W0
)
AES_STORE
(
DS
T
,
W1
)
AES_STORE
(
DS
T
,
W2
)
AES_STORE
(
DS
T
,
W3
)
subs
LENGTH
,
LENGTH
,
#
16
bhi
.Lblock_loop
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
,
pc
}
.Lend:
bx
lr
EPILOGUE
(
_nettle_aes_encrypt
)
armv7/aes.m4
0 → 100644
View file @
4ac1b5f0
C Loads one word, and adds it to the subkey. Uses T0
C AES_LOAD(SRC, KEY, REG)
define(<AES_LOAD>, <
ldrb $3, [$1], #+1
ldrb T0, [$1], #+1
orr $3, T0, lsl #8
ldrb T0, [$1], #+1
orr $3, T0, lsl #16
ldrb T0, [$1], #+1
orr $3, T0, lsl #24
ldr T0, [$2], #+4
eor $3, T0
>)
C Stores one word. Destroys input.
C AES_STORE(DST, X)
define(<AES_STORE>, <
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
>)
C 53 instr.
C It's tempting to use eor with rotation, but that's slower.
C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key)
define(<AES_ENCRYPT_ROUND>, <
uxtb T0, $1
ldr $5, [TABLE, T0, lsl #2]
uxtb T0, $2
ldr $6, [TABLE, T0, lsl #2]
uxtb T0, $3
ldr $7, [TABLE, T0, lsl #2]
uxtb T0, $4
ldr $8, [TABLE, T0, lsl #2]
uxtb T0, $2, ror #8
add TABLE, TABLE, #1024
ldr T0, [TABLE, T0, lsl #2]
eor $5, $5, T0
uxtb T0, $3, ror #8
ldr T0, [TABLE, T0, lsl #2]
eor $6, $6, T0
uxtb T0, $4, ror #8
ldr T0, [TABLE, T0, lsl #2]
eor $7, $7, T0
uxtb T0, $1, ror #8
ldr T0, [TABLE, T0, lsl #2]
eor $8, $8, T0
uxtb T0, $3, ror #16
add TABLE, TABLE, #1024
ldr T0, [TABLE, T0, lsl #2]
eor $5, $5, T0
uxtb T0, $4, ror #16
ldr T0, [TABLE, T0, lsl #2]
eor $6, $6, T0
uxtb T0, $1, ror #16
ldr T0, [TABLE, T0, lsl #2]
eor $7, $7, T0
uxtb T0, $2, ror #16
ldr T0, [TABLE, T0, lsl #2]
eor $8, $8, T0
uxtb T0, $4, ror #24
add TABLE, TABLE, #1024
ldr T0, [TABLE, T0, lsl #2]
eor $5, $5, T0
uxtb T0, $1, ror #24
ldr T0, [TABLE, T0, lsl #2]
eor $6, $6, T0
uxtb T0, $2, ror #24
ldr T0, [TABLE, T0, lsl #2]
eor $7, $7, T0
uxtb T0, $3, ror #24
ldr T0, [TABLE, T0, lsl #2]
ldm $9!, {$1,$2,$3,$4}
eor $8, $8, T0
sub TABLE, TABLE, #3072
eor $5, $5, $1
eor $6, $6, $2
eor $7, $7, $3
eor $8, $8, $4
>)
define(<AES_DECRYPT_ROUND>, <
uxtb T0, $1
ldr $5, [TABLE, T0, lsl #2]
uxtb T0, $2
ldr $6, [TABLE, T0, lsl #2]
uxtb T0, $3
ldr $7, [TABLE, T0, lsl #2]
uxtb T0, $4
ldr $8, [TABLE, T0, lsl #2]
uxtb T0, $4, ror #8
add TABLE, TABLE, #1024
ldr T0, [TABLE, T0, lsl #2]
eor $5, $5, T0
uxtb T0, $1, ror #8
ldr T0, [TABLE, T0, lsl #2]
eor $6, $6, T0
uxtb T0, $2, ror #8
ldr T0, [TABLE, T0, lsl #2]
eor $7, $7, T0
uxtb T0, $3, ror #8
ldr T0, [TABLE, T0, lsl #2]
eor $8, $8, T0
uxtb T0, $3, ror #16
add TABLE, TABLE, #1024
ldr T0, [TABLE, T0, lsl #2]
eor $5, $5, T0
uxtb T0, $4, ror #16
ldr T0, [TABLE, T0, lsl #2]
eor $6, $6, T0
uxtb T0, $1, ror #16
ldr T0, [TABLE, T0, lsl #2]
eor $7, $7, T0
uxtb T0, $2, ror #16
ldr T0, [TABLE, T0, lsl #2]
eor $8, $8, T0
uxtb T0, $2, ror #24
add TABLE, TABLE, #1024
ldr T0, [TABLE, T0, lsl #2]
eor $5, $5, T0
uxtb T0, $3, ror #24
ldr T0, [TABLE, T0, lsl #2]
eor $6, $6, T0
uxtb T0, $4, ror #24
ldr T0, [TABLE, T0, lsl #2]
eor $7, $7, T0
uxtb T0, $1, ror #24
ldr T0, [TABLE, T0, lsl #2]
ldm $9!, {$1,$2,$3,$4}
eor $8, $8, T0
sub TABLE, TABLE, #3072
eor $5, $5, $1
eor $6, $6, $2
eor $7, $7, $3
eor $8, $8, $4
>)
C AES_FINAL_ROUND(a,b,c,d,key,res)
define(<AES_FINAL_ROUND>, <
uxtb T0, $1
ldrb $6, [TABLE, T0]
uxtb T0, $2, ror #8
ldrb T0, [TABLE, T0]
eor $6, $6, T0, lsl #8
uxtb T0, $3, ror #16
ldrb T0, [TABLE, T0]
eor $6, $6, T0, lsl #16
uxtb T0, $4, ror #24
ldrb T0, [TABLE, T0]
eor $6, $6, T0, lsl #24
ldr T0, [$5], #+4
eor $6, T0
>)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment