Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
nettle
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Marcus Hoffmann
nettle
Commits
06fe7d83
Commit
06fe7d83
authored
Apr 18, 2012
by
Niels Möller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86_64 implementation of salsa20.
parent
95c8eb72
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
275 additions
and
1 deletion
+275
-1
ChangeLog
ChangeLog
+4
-0
configure.ac
configure.ac
+1
-1
x86_64/salsa20-crypt.asm
x86_64/salsa20-crypt.asm
+270
-0
No files found.
ChangeLog
View file @
06fe7d83
2012-04-18 Niels Möller <nisse@lysator.liu.se>
* x86_64/salsa20-crypt.asm: New file.
2012-04-17 Niels Möller <nisse@lysator.liu.se>
* testsuite/salsa20-test.c (test_salsa20_stream): Check that
...
...
configure.ac
View file @
06fe7d83
...
...
@@ -233,7 +233,7 @@ if test "x$enable_assembler" = xyes ; then
found=no
for tmp_f in aes-encrypt-internal.asm aes-decrypt-internal.asm \
arcfour-crypt.asm camellia-crypt-internal.asm \
md5-compress.asm memxor.asm \
md5-compress.asm memxor.asm
salsa20-crypt.asm
\
serpent-encrypt.asm serpent-decrypt.asm \
sha1-compress.asm machine.m4; do
# echo "Looking for $srcdir/$asm_path/$tmp_f"
...
...
x86_64/salsa20-crypt.asm
0 → 100644
View file @
06fe7d83
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2012
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
59
Temple
Place
-
Suite
330
,
Boston
,
C
MA
02111
-
1307
,
USA.
define
(
<
CTX
>
,
<%
rdi
>
)
define
(
<
LENGTH
>
,
<%
rsi
>
)
define
(
<
DS
T
>
,
<%
rdx
>
)
define
(
<
SRC
>
,
<%
rcx
>
)
define
(
<
T64
>
,
<%
r8
>
)
define
(
<
POS
>
,
<%
r9
>
)
define
(
<
X0
>
,
<%
xmm0
>
)
define
(
<
X1
>
,
<%
xmm1
>
)
define
(
<
X2
>
,
<%
xmm2
>
)
define
(
<
X3
>
,
<%
xmm3
>
)
define
(
<
T0
>
,
<%
xmm4
>
)
define
(
<
T1
>
,
<%
xmm5
>
)
define
(
<
M0101
>
,
<%
xmm6
>
)
define
(
<
M0110
>
,
<%
xmm7
>
)
define
(
<
M0011
>
,
<%
xmm8
>
)
define
(
<
COUNT
>
,
<%
rax
>
)
C
Possible
improvements
:
C
C
Do
two
bl
ocks
(
or
more
)
at
a
time
in
parallel
,
to
avoid
limitations
C
due
to
data
dependencies.
C
C
Avoid
redoing
the
permutation
of
the
input
for
each
bl
ock
(
al
l
but
C
the
two
counter
word
s
are
constant
)
.
Could
al
so
keep
the
input
in
C
registers.
C
QROUND
(
x0
,
x1
,
x2
,
x3
)
define
(
<
QROUND
>
,
<
movaps
$
4
,
T0
C
0
paddd
$
1
,
T0
C
1
movaps
T0
,
T1
C
2
pslld
<
$
>
7
,
T0
C
2
psrld
<
$
>
25
,
T1
C
3
pxor
T0
,
$
2
C
3
pxor
T1
,
$
2
C
4
movaps
$
1
,
T0
C
0
paddd
$
2
,
T0
C
5
movaps
T0
,
T1
C
6
pslld
<
$
>
9
,
T0
C
6
psrld
<
$
>
23
,
T1
C
7
pxor
T0
,
$
3
C
7
pxor
T1
,
$
3
C
8
movaps
$
2
,
T0
C
0
paddd
$
3
,
T0
C
9
movaps
T0
,
T1
C
10
pslld
<
$
>
13
,
T0
C
10
psrld
<
$
>
19
,
T1
C
11
pxor
T0
,
$
4
C
11
pxor
T1
,
$
4
C
12
movaps
$
3
,
T0
C
0
paddd
$
4
,
T0
C
13
movaps
T0
,
T1
C
14
pslld
<
$
>
18
,
T0
C
14
psrld
<
$
>
14
,
T1
C
15
pxor
T0
,
$
1
C
15
pxor
T1
,
$
1
C
16
>)
C
SWAP
(
x0
,
x1
,
mask
)
C
Swaps
bits
in
x0
and
x1
,
with
bits
selected
by
the
mask
define
(
<
SWAP
>
,
<
movaps
$
1
,
T0
pxor
$
2
,
$
1
pand
$
3
,
$
1
pxor
$
1
,
$
2
pxor
T0
,
$
1
>)
.file
"salsa20.asm"
C
salsa20_crypt
(
struct
salsa20_ctx
*
ctx
,
unsigned
length
,
C
uint8_t
*
ds
t
,
const
uint8_t
*
src
)
.text
ALIGN
(
4
)
PROLOGUE
(
nettle_salsa20_crypt
)
W64_ENTRY
(
4
,
9
)
test
LENGTH
,
LENGTH
jz
.Lend
C
Load
mask
registers
mov
$
-
1
,
XREG
(
COUNT
)
movd
XREG
(
COUNT
),
M0101
pshufd
$
0x09
,
M0101
,
M0011
C
01
01
00
00
pshufd
$
0x41
,
M0101
,
M0110
C
01
00
00
01
pshufd
$
0x22
,
M0101
,
M0101
C
01
00
01
00
.Lblock_loop:
movups
(
CTX
),
X0
movups
16
(
CTX
),
X1
movups
32
(
CTX
),
X2
movups
48
(
CTX
),
X3
C
On
input
,
each
xmm
register
is
one
row.
We
start
with
C
C
0
1
2
3
C
4
5
6
7
C
8
9
10
11
C
12
13
14
15
C
C
Di
agrams
are
in
little
-
endian
order
,
with
least
si
gnificant
word
to
C
the
left.
We
rotate
the
columns
,
to
get
instead
C
C
0
5
10
15
C
4
9
14
3
C
8
13
2
7
C
12
1
6
11
C
C
The
original
rows
are
now
di
agonals.
SWAP
(
X0
,
X1
,
M0101
)
SWAP
(
X2
,
X3
,
M0101
)
SWAP
(
X1
,
X3
,
M0110
)
SWAP
(
X0
,
X2
,
M0011
)
movl
$
10
,
XREG
(
COUNT
)
ALIGN
(
4
)
.Loop:
QROUND
(
X0
,
X1
,
X2
,
X3
)
C
For
the
row
operations
,
we
first
rotate
the
rows
,
to
get
C
C
0
5
10
15
C
3
4
9
14
C
2
7
8
13
C
1
6
11
12
C
C
Now
the
original
rows
are
turned
into
into
columns.
(
This
C
SI
MD
hack
described
in
djb
'
s
papers
)
.
pshufd
$
0x93
,
X1
,
X1
C
11
00
01
10
(
least
si
gn.
left
)
pshufd
$
0x4e
,
X2
,
X2
C
10
11
00
01
pshufd
$
0x39
,
X3
,
X3
C
01
10
11
00
QROUND
(
X0
,
X3
,
X2
,
X1
)
C
Inverse
rotation
of
the
rows
pshufd
$
0x39
,
X1
,
X1
C
01
10
11
00
pshufd
$
0x4e
,
X2
,
X2
C
10
11
00
01
pshufd
$
0x93
,
X3
,
X3
C
11
00
01
10
decl
XREG
(
COUNT
)
jnz
.Loop
SWAP
(
X0
,
X2
,
M0011
)
SWAP
(
X1
,
X3
,
M0110
)
SWAP
(
X0
,
X1
,
M0101
)
SWAP
(
X2
,
X3
,
M0101
)
movups
(
CTX
),
T0
movups
16
(
CTX
),
T1
paddd
T0
,
X0
paddd
T1
,
X1
movups
32
(
CTX
),
T0
movups
48
(
CTX
),
T1
paddd
T0
,
X2
paddd
T1
,
X3
C
Increment
bl
ock
counter
incq
32
(
CTX
)
cmp
$
64
,
LENGTH
jc
.Lfinal_xor
movups
48
(
SRC
),
T1
pxor
T1
,
X3
movups
X3
,
48
(
DS
T
)
.Lxor3:
movups
32
(
SRC
),
T0
pxor
T0
,
X2
movups
X2
,
32
(
DS
T
)
.Lxor2:
movups
16
(
SRC
),
T1
pxor
T1
,
X1
movups
X1
,
16
(
DS
T
)
.Lxor1:
movups
(
SRC
),
T0
pxor
T0
,
X0
movups
X0
,
(
DS
T
)
lea
64
(
SRC
),
SRC
lea
64
(
DS
T
),
DS
T
sub
$
64
,
LENGTH
ja
.Lblock_loop
.Lend:
W64_EXIT
(
4
,
9
)
ret
.Lfinal_xor:
cmp
$
32
,
LENGTH
jz
.Lxor2
jc
.Llt32
cmp
$
48
,
LENGTH
jz
.Lxor3
jc
.Llt48
movaps
X3
,
T0
call
.Lpartial
jmp
.Lxor3
.Llt48:
movaps
X2
,
T0
call
.Lpartial
jmp
.Lxor2
.Llt32:
cmp
$
16
,
LENGTH
jz
.Lxor1
jc
.Llt16
movaps
X1
,
T0
call
.Lpartial
jmp
.Lxor1
.Llt16:
movaps
X0
,
T0
call
.Lpartial
jmp
.Lend
.Lpartial:
mov
LENGTH
,
POS
and
$
-
16
,
POS
test
$
8
,
LENGTH
jz
.Llt8
movq
T0
,
T64
xor
(
SRC
,
POS
),
T64
mov
T64
,
(
DS
T
,
POS
)
lea
8
(
POS
),
POS
pshufd
$
0xee
,
T0
,
T0
C
10
11
10
11
.Llt8:
movq
T0
,
T64
test
$
4
,
LENGTH
jz
.Llt4
mov
XREG
(
T64
),
XREG
(
COUNT
)
xor
(
SRC
,
POS
),
XREG
(
COUNT
)
mov
XREG
(
COUNT
),
(
DS
T
,
POS
)
lea
4
(
POS
),
POS
shr
$
32
,
T64
.Llt4:
test
$
2
,
LENGTH
jz
.Llt2
mov
WREG
(
T64
),
WREG
(
COUNT
)
xor
(
SRC
,
POS
),
WREG
(
COUNT
)
mov
WREG
(
COUNT
),
(
DS
T
,
POS
)
lea
2
(
POS
),
POS
shr
$
16
,
XREG
(
T64
)
.Llt2:
test
$
1
,
LENGTH
jz
.Lpartial_done
xor
(
SRC
,
POS
),
LREG
(
T64
)
mov
LREG
(
T64
),
(
DS
T
,
POS
)
.Lpartial_done:
ret
EPILOGUE
(
nettle_salsa20_crypt
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment