Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Nettle
nettle
Commits
06fe7d83
Commit
06fe7d83
authored
Apr 18, 2012
by
Niels Möller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86_64 implementation of salsa20.
parent
95c8eb72
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
275 additions
and
1 deletion
+275
-1
ChangeLog
ChangeLog
+4
-0
configure.ac
configure.ac
+1
-1
x86_64/salsa20-crypt.asm
x86_64/salsa20-crypt.asm
+270
-0
No files found.
ChangeLog
View file @
06fe7d83
2012-04-18 Niels Möller <nisse@lysator.liu.se>
* x86_64/salsa20-crypt.asm: New file.
2012-04-17 Niels Möller <nisse@lysator.liu.se>
* testsuite/salsa20-test.c (test_salsa20_stream): Check that
...
...
configure.ac
View file @
06fe7d83
...
...
@@ -233,7 +233,7 @@ if test "x$enable_assembler" = xyes ; then
found=no
for tmp_f in aes-encrypt-internal.asm aes-decrypt-internal.asm \
arcfour-crypt.asm camellia-crypt-internal.asm \
md5-compress.asm memxor.asm \
md5-compress.asm memxor.asm
salsa20-crypt.asm
\
serpent-encrypt.asm serpent-decrypt.asm \
sha1-compress.asm machine.m4; do
# echo "Looking for $srcdir/$asm_path/$tmp_f"
...
...
x86_64/salsa20-crypt.asm
0 → 100644
View file @
06fe7d83
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2012
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
59
Temple
Place
-
Suite
330
,
Boston
,
C
MA
02111
-
1307
,
USA.
define
(
<
CTX
>
,
<%
rdi
>
)
define
(
<
LENGTH
>
,
<%
rsi
>
)
define
(
<
DS
T
>
,
<%
rdx
>
)
define
(
<
SRC
>
,
<%
rcx
>
)
define
(
<
T64
>
,
<%
r8
>
)
define
(
<
POS
>
,
<%
r9
>
)
define
(
<
X0
>
,
<%
xmm0
>
)
define
(
<
X1
>
,
<%
xmm1
>
)
define
(
<
X2
>
,
<%
xmm2
>
)
define
(
<
X3
>
,
<%
xmm3
>
)
define
(
<
T0
>
,
<%
xmm4
>
)
define
(
<
T1
>
,
<%
xmm5
>
)
define
(
<
M0101
>
,
<%
xmm6
>
)
define
(
<
M0110
>
,
<%
xmm7
>
)
define
(
<
M0011
>
,
<%
xmm8
>
)
define
(
<
COUNT
>
,
<%
rax
>
)
C
Possible
improvements
:
C
C
Do
two
bl
ocks
(
or
more
)
at
a
time
in
parallel
,
to
avoid
limitations
C
due
to
data
dependencies.
C
C
Avoid
redoing
the
permutation
of
the
input
for
each
bl
ock
(
al
l
but
C
the
two
counter
word
s
are
constant
)
.
Could
al
so
keep
the
input
in
C
registers.
C
QROUND
(
x0
,
x1
,
x2
,
x3
)
define
(
<
QROUND
>
,
<
movaps
$
4
,
T0
C
0
paddd
$
1
,
T0
C
1
movaps
T0
,
T1
C
2
pslld
<
$
>
7
,
T0
C
2
psrld
<
$
>
25
,
T1
C
3
pxor
T0
,
$
2
C
3
pxor
T1
,
$
2
C
4
movaps
$
1
,
T0
C
0
paddd
$
2
,
T0
C
5
movaps
T0
,
T1
C
6
pslld
<
$
>
9
,
T0
C
6
psrld
<
$
>
23
,
T1
C
7
pxor
T0
,
$
3
C
7
pxor
T1
,
$
3
C
8
movaps
$
2
,
T0
C
0
paddd
$
3
,
T0
C
9
movaps
T0
,
T1
C
10
pslld
<
$
>
13
,
T0
C
10
psrld
<
$
>
19
,
T1
C
11
pxor
T0
,
$
4
C
11
pxor
T1
,
$
4
C
12
movaps
$
3
,
T0
C
0
paddd
$
4
,
T0
C
13
movaps
T0
,
T1
C
14
pslld
<
$
>
18
,
T0
C
14
psrld
<
$
>
14
,
T1
C
15
pxor
T0
,
$
1
C
15
pxor
T1
,
$
1
C
16
>)
C
SWAP
(
x0
,
x1
,
mask
)
C
Swaps
bits
in
x0
and
x1
,
with
bits
selected
by
the
mask
define
(
<
SWAP
>
,
<
movaps
$
1
,
T0
pxor
$
2
,
$
1
pand
$
3
,
$
1
pxor
$
1
,
$
2
pxor
T0
,
$
1
>)
.file
"salsa20.asm"
C
salsa20_crypt
(
struct
salsa20_ctx
*
ctx
,
unsigned
length
,
C
uint8_t
*
ds
t
,
const
uint8_t
*
src
)
.text
ALIGN
(
4
)
PROLOGUE
(
nettle_salsa20_crypt
)
W64_ENTRY
(
4
,
9
)
test
LENGTH
,
LENGTH
jz
.Lend
C
Load
mask
registers
mov
$
-
1
,
XREG
(
COUNT
)
movd
XREG
(
COUNT
),
M0101
pshufd
$
0x09
,
M0101
,
M0011
C
01
01
00
00
pshufd
$
0x41
,
M0101
,
M0110
C
01
00
00
01
pshufd
$
0x22
,
M0101
,
M0101
C
01
00
01
00
.Lblock_loop:
movups
(
CTX
),
X0
movups
16
(
CTX
),
X1
movups
32
(
CTX
),
X2
movups
48
(
CTX
),
X3
C
On
input
,
each
xmm
register
is
one
row.
We
start
with
C
C
0
1
2
3
C
4
5
6
7
C
8
9
10
11
C
12
13
14
15
C
C
Di
agrams
are
in
little
-
endian
order
,
with
least
si
gnificant
word
to
C
the
left.
We
rotate
the
columns
,
to
get
instead
C
C
0
5
10
15
C
4
9
14
3
C
8
13
2
7
C
12
1
6
11
C
C
The
original
rows
are
now
di
agonals.
SWAP
(
X0
,
X1
,
M0101
)
SWAP
(
X2
,
X3
,
M0101
)
SWAP
(
X1
,
X3
,
M0110
)
SWAP
(
X0
,
X2
,
M0011
)
movl
$
10
,
XREG
(
COUNT
)
ALIGN
(
4
)
.Loop:
QROUND
(
X0
,
X1
,
X2
,
X3
)
C
For
the
row
operations
,
we
first
rotate
the
rows
,
to
get
C
C
0
5
10
15
C
3
4
9
14
C
2
7
8
13
C
1
6
11
12
C
C
Now
the
original
rows
are
turned
into
into
columns.
(
This
C
SI
MD
hack
described
in
djb
'
s
papers
)
.
pshufd
$
0x93
,
X1
,
X1
C
11
00
01
10
(
least
si
gn.
left
)
pshufd
$
0x4e
,
X2
,
X2
C
10
11
00
01
pshufd
$
0x39
,
X3
,
X3
C
01
10
11
00
QROUND
(
X0
,
X3
,
X2
,
X1
)
C
Inverse
rotation
of
the
rows
pshufd
$
0x39
,
X1
,
X1
C
01
10
11
00
pshufd
$
0x4e
,
X2
,
X2
C
10
11
00
01
pshufd
$
0x93
,
X3
,
X3
C
11
00
01
10
decl
XREG
(
COUNT
)
jnz
.Loop
SWAP
(
X0
,
X2
,
M0011
)
SWAP
(
X1
,
X3
,
M0110
)
SWAP
(
X0
,
X1
,
M0101
)
SWAP
(
X2
,
X3
,
M0101
)
movups
(
CTX
),
T0
movups
16
(
CTX
),
T1
paddd
T0
,
X0
paddd
T1
,
X1
movups
32
(
CTX
),
T0
movups
48
(
CTX
),
T1
paddd
T0
,
X2
paddd
T1
,
X3
C
Increment
bl
ock
counter
incq
32
(
CTX
)
cmp
$
64
,
LENGTH
jc
.Lfinal_xor
movups
48
(
SRC
),
T1
pxor
T1
,
X3
movups
X3
,
48
(
DS
T
)
.Lxor3:
movups
32
(
SRC
),
T0
pxor
T0
,
X2
movups
X2
,
32
(
DS
T
)
.Lxor2:
movups
16
(
SRC
),
T1
pxor
T1
,
X1
movups
X1
,
16
(
DS
T
)
.Lxor1:
movups
(
SRC
),
T0
pxor
T0
,
X0
movups
X0
,
(
DS
T
)
lea
64
(
SRC
),
SRC
lea
64
(
DS
T
),
DS
T
sub
$
64
,
LENGTH
ja
.Lblock_loop
.Lend:
W64_EXIT
(
4
,
9
)
ret
.Lfinal_xor:
cmp
$
32
,
LENGTH
jz
.Lxor2
jc
.Llt32
cmp
$
48
,
LENGTH
jz
.Lxor3
jc
.Llt48
movaps
X3
,
T0
call
.Lpartial
jmp
.Lxor3
.Llt48:
movaps
X2
,
T0
call
.Lpartial
jmp
.Lxor2
.Llt32:
cmp
$
16
,
LENGTH
jz
.Lxor1
jc
.Llt16
movaps
X1
,
T0
call
.Lpartial
jmp
.Lxor1
.Llt16:
movaps
X0
,
T0
call
.Lpartial
jmp
.Lend
.Lpartial:
mov
LENGTH
,
POS
and
$
-
16
,
POS
test
$
8
,
LENGTH
jz
.Llt8
movq
T0
,
T64
xor
(
SRC
,
POS
),
T64
mov
T64
,
(
DS
T
,
POS
)
lea
8
(
POS
),
POS
pshufd
$
0xee
,
T0
,
T0
C
10
11
10
11
.Llt8:
movq
T0
,
T64
test
$
4
,
LENGTH
jz
.Llt4
mov
XREG
(
T64
),
XREG
(
COUNT
)
xor
(
SRC
,
POS
),
XREG
(
COUNT
)
mov
XREG
(
COUNT
),
(
DS
T
,
POS
)
lea
4
(
POS
),
POS
shr
$
32
,
T64
.Llt4:
test
$
2
,
LENGTH
jz
.Llt2
mov
WREG
(
T64
),
WREG
(
COUNT
)
xor
(
SRC
,
POS
),
WREG
(
COUNT
)
mov
WREG
(
COUNT
),
(
DS
T
,
POS
)
lea
2
(
POS
),
POS
shr
$
16
,
XREG
(
T64
)
.Llt2:
test
$
1
,
LENGTH
jz
.Lpartial_done
xor
(
SRC
,
POS
),
LREG
(
T64
)
mov
LREG
(
T64
),
(
DS
T
,
POS
)
.Lpartial_done:
ret
EPILOGUE
(
nettle_salsa20_crypt
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment