Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
nettle
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
5
Merge Requests
5
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
Nettle
nettle
Commits
2cd7a854
Commit
2cd7a854
authored
Nov 21, 2013
by
Niels Möller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86_64 assembly for poly1305.
parent
fb19e5ef
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
250 additions
and
1 deletion
+250
-1
ChangeLog
ChangeLog
+7
-0
asm.m4
asm.m4
+13
-0
configure.ac
configure.ac
+1
-1
x86_64/poly1305-internal.asm
x86_64/poly1305-internal.asm
+229
-0
No files found.
ChangeLog
View file @
2cd7a854
2013-11-21 Niels Möller <nisse@lysator.liu.se>
* x86_64/poly1305-internal.asm: New file. Almost a factor of two
speedup.
* configure.ac (asm_replace_list): Added poly1305-internal.asm.
* asm.m4: Define struct ffsets for 64-bit poly1305_ctx.
* poly1305.h (POLY1305_DIGEST): Pass the encrypted nonce as an
additional argument to poly1305_digest.
(struct poly1305_ctx): Introduce unions, to support either 26-bit
...
...
asm.m4
View file @
2cd7a854
...
...
@@ -76,4 +76,17 @@ STRUCTURE(AES)
STRUCT(TABLE2, AES_TABLE_SIZE)
STRUCT(TABLE3, AES_TABLE_SIZE)
C For 64-bit implementation
STRUCTURE(P1305)
STRUCT(R0, 8)
STRUCT(R1, 8)
STRUCT(S1, 8)
STRUCT(PAD, 12)
STRUCT(H2, 4)
STRUCT(H0, 8)
STRUCT(H1, 8)
STRUCT(NONCE, 16)
STRUCT(BLOCK, 16)
STRUCT(INDEX, 4)
divert
configure.ac
View file @
2cd7a854
...
...
@@ -259,7 +259,7 @@ fi
# to a new object file).
asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
arcfour-crypt.asm camellia-crypt-internal.asm \
md5-compress.asm memxor.asm \
md5-compress.asm memxor.asm
poly1305-internal.asm
\
salsa20-crypt.asm salsa20-core-internal.asm \
serpent-encrypt.asm serpent-decrypt.asm \
sha1-compress.asm sha256-compress.asm sha512-compress.asm \
...
...
x86_64/poly1305-internal.asm
0 → 100644
View file @
2cd7a854
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2013
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
C
MA
02111
-
1301
,
USA.
.file
"
poly1305
-
internal.asm
"
C
Registers
mainly
used
by
poly1305_block
define
(
<
CTX
>
,
<%
rdi
>
)
define
(
<
T0
>
,
<%
rcx
>
)
define
(
<
T1
>
,
<%
rsi
>
)
define
(
<
T2
>
,
<%
r8
>
)
define
(
<
H0
>
,
<%
r9
>
)
define
(
<
H1
>
,
<%
r10
>
)
define
(
<
H2
>
,
<%
r11
>
)
C
poly1305_set_key
(
struct
poly1305_ctx
*
ctx
,
const
uint8_t
key
[
16
])
.text
C
Registers
:
C
%
rdi
:
ctx
C
%
rsi
:
key
C
%
r8
:
mask
ALIGN
(
16
)
PROLOGUE
(
nettle_poly1305_set_key
)
W64_ENTRY
(
2
,
0
)
mov
$
0x0ffffffc0fffffff
,
%
r8
mov
(
%
rsi
),
%
rax
and
%
r8
,
%
rax
and
$
-
4
,
%
r8
mov
%
rax
,
(
CTX
)
mov
8
(
%
rsi
),
%
rax
and
%
r8
,
%
rax
mov
%
rax
,
P1305_R1
(
CTX
)
shr
$
2
,
%
rax
imul
$
5
,
%
rax
mov
%
rax
,
P1305_S1
(
CTX
)
xor
XREG
(
%
rax
),
XREG
(
%
rax
)
mov
%
rax
,
P1305_H0
(
CTX
)
mov
%
rax
,
P1305_H1
(
CTX
)
mov
XREG
(
%
rax
),
P1305_H2
(
CTX
)
mov
XREG
(
%
rax
),
P1305_INDEX
(
CTX
)
W64_EXIT
(
2
,
0
)
ret
EPILOGUE
(
nettle_poly1305_set_key
)
C
64
-
bit
multiplication
mod
2
^
130
-
5
C
C
(
x_0
+
B
x_1
+
B
^
2
x_1
)
*
(
r_0
+
B
r_1
)
=
C
1
B
B
^
2
B
^
3
C
x_0
r_0
C
x_0
r_1
C
x_1
r_0
C
x_1
r_1
C
x_2
r_0
C
x_2
r_1
C
Then
r_1
B
^
2
=
r_1
/
4
(
2
^
130
)
=
5
/
4
r_1.
C
and
r_1
B
^
3
=
5
/
4
B
r_1
C
So
we
get
C
C
x_0
r_0
+
x_1
(
5
/
4
r_1
)
+
B
(
x_0
r_1
+
x_1
r_0
+
x_2
5
/
4
r_1
+
B
x_2
r_0
)
C
poly1305_block
(
struct
poly1305_ctx
*
ctx
,
const
uint8_t
m
[
16
])
PROLOGUE
(
nettle_poly1305_block
)
mov
(
%
rsi
),
T0
mov
8
(
%
rsi
),
T1
mov
$
1
,
T2
C
FIXME
:
Support
windows
ABI
C
Registers
:
C
Inputs
:
CTX
,
T0
,
T1
,
T2
,
C
Outputs
:
H0
,
H1
,
H2
,
stored
into
the
context.
C_NAME
(
poly1305_block
):
add
P1305_H0
(
CTX
),
T0
adc
P1305_H1
(
CTX
),
T1
adc
P1305_H2
(
CTX
),
XREG
(
T2
)
mov
P1305_R0
(
CTX
),
%
rax
mul
T0
mov
%
rax
,
H0
mov
%
rdx
,
H1
mov
P1305_S1
(
CTX
),
%
rax
C
5
/
4
r1
mov
%
rax
,
H2
mul
T1
imul
T2
,
H2
imul
P1305_R0
(
CTX
),
T2
add
%
rax
,
H0
adc
%
rdx
,
H1
mov
P1305_R0
(
CTX
),
%
rax
mul
T1
add
%
rax
,
H2
adc
%
rdx
,
T2
mov
P1305_R1
(
CTX
),
%
rax
mul
T0
add
%
rax
,
H2
adc
%
rdx
,
T2
mov
T2
,
%
rax
shr
$
2
,
%
rax
imul
$
5
,
%
rax
and
$
3
,
XREG
(
T2
)
add
%
rax
,
H0
adc
H2
,
H1
adc
$
0
,
XREG
(
T2
)
mov
H0
,
P1305_H0
(
CTX
)
mov
H1
,
P1305_H1
(
CTX
)
mov
XREG
(
T2
),
P1305_H2
(
CTX
)
ret
EPILOGUE
(
nettle_poly1305_block
)
C
poly1305_digest
(
struct
poly1305_ctx
*
ctx
,
C
si
ze_t
length
,
uint8_t
*
di
gest
,
C
const
uint8_t
*
s
)
C
Registers
:
C
%
rdi
:
ctx
C
%
rsi
:
length
C
%
rdx
:
di
gest
C
%
rcx
:
s
PROLOGUE
(
nettle_poly1305_digest
)
W64_ENTRY
(
4
,
0
)
mov
P1305_INDEX
(
CTX
),
XREG
(
%
rax
)
push
%
rsi
push
%
rdx
push
%
rcx
test
XREG
(
%
rax
),
XREG
(
%
rax
)
jz
.Lfinal
C
Pad
with
a
1
byte
.
C
FIXME
:
Or
in
,
without
storing
in
memory.
inc
XREG
(
%
rax
)
C
Al
so
cl
ears
high
half
movb
$
1
,
P1305_BLOCK
-
1
(
CTX
,
%
rax
)
mov
XREG
(
%
rax
),
XREG
(
%
rcx
)
mov
$
1
,
T1
and
$
7
,
XREG
(
%
rcx
)
shl
$
3
,
XREG
(
%
rcx
)
shl
LREG
(
%
rcx
),
T1
dec
T1
mov
P1305_BLOCK
(
CTX
),
T0
xor
T2
,
T2
cmp
$
8
,
XREG
(
%
rax
)
jc
.Lfinal_lt8
C
If
%
rax
==
16
,
we
get
T1
==
0
,
C
tweak
so
we
get
need
T1
=
-
1
instead.
cmp
$
16
,
XREG
(
%
rax
)
adc
$
-
1
,
T1
and
P1305_BLOCK
+
8
(
CTX
),
T1
jmp
.Lfinal_block
.Lfinal_lt8:
and
T1
,
T0
xor
T1
,
T1
.Lfinal_block:
call
poly1305_block
.Lfinal:
mov
P1305_H0
(
CTX
),
H0
mov
P1305_H1
(
CTX
),
H1
mov
P1305_H2
(
CTX
),
XREG
(
H2
)
mov
XREG
(
H2
),
XREG
(
%
rax
)
shr
$
2
,
XREG
(
%
rax
)
and
$
3
,
H2
imul
$
5
,
XREG
(
%
rax
)
add
%
rax
,
H0
adc
$
0
,
H1
adc
$
0
,
XREG
(
H2
)
C
Add
5
,
use
result
if
>
=
2
^
130
mov
$
5
,
T0
xor
T1
,
T1
add
H0
,
T0
adc
H1
,
T1
adc
$
0
,
XREG
(
H2
)
cmp
$
4
,
XREG
(
H2
)
cmovnc
T0
,
H0
cmovnc
T1
,
H1
pop
%
rcx
pop
%
rdx
pop
%
rsi
add
(
%
rcx
),
H0
adc
8
(
%
rcx
),
H1
C
Store
,
taking
length
into
account
cmp
$
8
,
%
rsi
jc
.Ldigest_lt8
mov
H0
,
(
%
rdx
)
jz
.Ldigest_done
cmp
$
16
,
%
rsi
jc
.Ldigest_lt16
mov
H1
,
8
(
%
rdx
)
jmp
.Ldigest_done
.Ldigest_lt16:
mov
H1
,
H0
add
$
8
,
%
rdx
sub
$
8
,
%
rsi
.Ldigest_lt8:
movb
LREG
(
H0
),
(
%
rdx
)
shr
$
8
,
H0
inc
%
rdx
dec
%
rsi
jnz
.Ldigest_lt8
.Ldigest_done:
xor
XREG
(
%
rax
),
XREG
(
%
rax
)
mov
%
rax
,
P1305_H0
(
CTX
)
mov
%
rax
,
P1305_H1
(
CTX
)
mov
XREG
(
%
rax
),
P1305_H2
(
CTX
)
mov
XREG
(
%
rax
),
P1305_INDEX
(
CTX
)
W64_EXIT
(
4
,
0
)
ret
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment