Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Nettle
nettle
Commits
c5e15342
Commit
c5e15342
authored
Mar 26, 2013
by
Niels Möller
Browse files
ARM assembly for salsa20.
parent
769c3198
Changes
2
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
c5e15342
2013-03-26 Niels Möller <nisse@lysator.liu.se>
* armv7/salsa20-core-internal.asm: New file. 45% speedup.
2013-03-25 Niels Möller <nisse@lysator.liu.se>
From Martin Storsjö:
...
...
armv7/salsa20-core-internal.asm
0 → 100644
View file @
c5e15342
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2013
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
C
MA
02111
-
1301
,
USA.
.file
"
salsa20
-
core
-
internal.asm
"
.fpu
neon
define
(
<
DS
T
>
,
<
r0
>
)
define
(
<
SRC
>
,
<
r1
>
)
define
(
<
ROUNDS
>
,
<
r2
>
)
define
(
<
X0
>
,
<
q0
>
)
define
(
<
X1
>
,
<
q1
>
)
define
(
<
X2
>
,
<
q2
>
)
define
(
<
X3
>
,
<
q3
>
)
define
(
<
T0
>
,
<
q8
>
)
define
(
<
T1
>
,
<
q9
>
)
define
(
<
M0101
>
,
<
q10
>
)
define
(
<
M0110
>
,
<
q11
>
)
define
(
<
M0011
>
,
<
q12
>
)
define
(
<
S1
>
,
<
q13
>
)
define
(
<
S2
>
,
<
q14
>
)
define
(
<
S3
>
,
<
q15
>
)
define
(
<
QROUND
>
,
<
vadd.i32
T0
,
$
1
,
$
4
vshl.i32
T1
,
T0
,
#
7
vshr.u32
T0
,
T0
,
#
25
veor
$
2
,
$
2
,
T0
veor
$
2
,
$
2
,
T1
vadd.i32
T0
,
$
1
,
$
2
vshl.i32
T1
,
T0
,
#
9
vshr.u32
T0
,
T0
,
#
23
veor
$
3
,
$
3
,
T0
veor
$
3
,
$
3
,
T1
vadd.i32
T0
,
$
2
,
$
3
vshl.i32
T1
,
T0
,
#
13
vshr.u32
T0
,
T0
,
#
19
veor
$
4
,
$
4
,
T0
veor
$
4
,
$
4
,
T1
vadd.i32
T0
,
$
3
,
$
4
vshl.i32
T1
,
T0
,
#
18
vshr.u32
T0
,
T0
,
#
14
veor
$
1
,
$
1
,
T0
veor
$
1
,
$
1
,
T1
>)
.text
.align
4
.Lmasks:
.int
0
,
-
1
,
0
,
-
1
.int
0
,
-
1
,
-
1
,
0
.int
0
,
0
,
-
1
,
-
1
C
_salsa20_core
(
uint32_t
*
ds
t
,
const
uint32_t
*
src
,
unsigned
rounds
)
PROLOGUE
(
_nettle_salsa20_core
)
vldm
SRC
,
{
X0
,
X1
,
X2
,
X3
}
C
Input
rows
:
C
0
1
2
3
X0
C
4
5
6
7
X1
C
8
9
10
11
X2
C
12
13
14
15
X3
C
Permuted
to
:
C
0
5
10
15
C
4
9
14
3
C
8
13
2
7
C
12
1
6
11
C
FIXME
:
Construct
in
some
other
way?
adr
r12
,
.Lmasks
vldm
r12
,
{
M0101
,
M0110
,
M0011
}
vmov
S1
,
X1
vmov
S2
,
X2
vmov
S3
,
X3
C
Swaps
in
columns
1
,
3
:
C
0
5
2
7
X0
^
C
4
1
6
3
T0
v
C
8
13
10
15
T1
^
C
12
9
14
11
X3
v
vmov
T0
,
X1
vmov
T1
,
X2
vbit
T0
,
X0
,
M0101
vbit
X0
,
X1
,
M0101
vbit
T1
,
X3
,
M0101
vbit
X3
,
X2
,
M0101
C
Swaps
in
column
1
,
2
:
C
0
5
2
7
X0
C
4
9
14
3
X1
^
C
8
13
10
15
T1
|
C
12
1
6
11
X3
v
vmov
X1
,
T0
vbit
X1
,
X3
,
M0110
vbit
X3
,
T0
,
M0110
C
Swaps
in
columm
2
,
3
:
C
0
5
10
15
X0
^
C
4
9
14
3
X1
|
C
8
13
2
7
X2
v
C
12
1
6
11
X3
vmov
X2
,
T1
vbit
X2
,
X0
,
M0011
vbit
X0
,
T1
,
M0011
.Loop:
QROUND
(
X0
,
X1
,
X2
,
X3
)
C
Rotate
rows
,
to
get
C
0
5
10
15
C
3
4
9
14
>>>
1
C
2
7
8
13
>>>
2
C
1
6
11
12
>>>
3
vext.32
X1
,
X1
,
X1
,
#
3
vext.32
X2
,
X2
,
X2
,
#
2
vext.32
X3
,
X3
,
X3
,
#
1
QROUND
(
X0
,
X3
,
X2
,
X1
)
subs
ROUNDS
,
ROUNDS
,
#
2
C
Inverse
rotation
vext.32
X1
,
X1
,
X1
,
#
1
vext.32
X2
,
X2
,
X2
,
#
2
vext.32
X3
,
X3
,
X3
,
#
3
bhi
.Loop
C
Inverse
swaps
vmov
T1
,
X2
vbit
T1
,
X0
,
M0011
vbit
X0
,
X2
,
M0011
vmov
T0
,
X1
vbit
T0
,
X3
,
M0110
vbit
X3
,
X1
,
M0110
vmov
X1
,
T0
vmov
X2
,
T1
vbit
X1
,
X0
,
M0101
vbit
X0
,
T0
,
M0101
vbit
X2
,
X3
,
M0101
vbit
X3
,
T1
,
M0101
vld1.64
{
T0
}
,
[
SRC
]
vadd.u32
X0
,
X0
,
T0
vadd.u32
X1
,
X1
,
S1
vadd.u32
X2
,
X2
,
S2
vadd.u32
X3
,
X3
,
S3
vstm
DS
T
,
{
X0
,
X1
,
X2
,
X3
}
bx
lr
EPILOGUE
(
_nettle_salsa20_core
)
divert
(
-
1
)
define
salsastate
p
/
x
$
q0.u32
p
/
x
$
q1.u32
p
/
x
$
q2.u32
p
/
x
$
q3.u32
end
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment