Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
nettle
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
5
Merge Requests
5
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
Nettle
nettle
Commits
93037338
Commit
93037338
authored
Apr 16, 2013
by
Niels Möller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86_64 assembly for umac_nh_n.
parent
0f10b7b4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
263 additions
and
0 deletions
+263
-0
ChangeLog
ChangeLog
+2
-0
x86_64/umac-nh-n.asm
x86_64/umac-nh-n.asm
+261
-0
No files found.
ChangeLog
View file @
93037338
2013-04-16 Niels Möller <nisse@lysator.liu.se>
* x86_64/umac-nh-n.asm: New file, 3.5 time speedup.
* umac32.c (umac32_digest): Fix nonce caching.
* umac64.c (umac64_digest): Likewise.
...
...
x86_64/umac-nh-n.asm
0 → 100644
View file @
93037338
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2013
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
C
MA
02111
-
1301
,
USA.
define
(
<
OUT
>
,
<%
rdi
>
)
define
(
<
ITERS
>
,
<%
rsi
>
)
define
(
<
KEY
>
,
<%
rdx
>
)
define
(
<
LENGTH
>
,
<%
rcx
>
)
define
(
<
MSG
>
,
<%
r8
>
)
define
(
<
XM0
>
,
<%
xmm0
>
)
define
(
<
XM1
>
,
<%
xmm1
>
)
define
(
<
XM2
>
,
<%
xmm2
>
)
define
(
<
XM3
>
,
<%
xmm3
>
)
define
(
<
XK0
>
,
<%
xmm4
>
)
define
(
<
XK1
>
,
<%
xmm5
>
)
define
(
<
XK2
>
,
<%
xmm6
>
)
define
(
<
XK3
>
,
<%
xmm7
>
)
define
(
<
XT0
>
,
<%
xmm8
>
)
define
(
<
XT1
>
,
<%
xmm9
>
)
define
(
<
XT2
>
,
<%
xmm10
>
)
define
(
<
XT3
>
,
<%
xmm11
>
)
define
(
<
XY0
>
,
<%
xmm12
>
)
define
(
<
XY1
>
,
<%
xmm13
>
)
C
Copy
[
0
,
1
,
2
,
3
]
to
[
1
,
1
,
3
,
3
]
define
(
<
HI2LO
>
,
<
pshufd
<
$
>
0xf5
,
>
)
C
FIXME
:
Would
be
nice
if
we
could
force
the
key
array
to
be
16
-
byte
C
al
igned.
.file
"
umac
-
nh
-
n.asm
"
C
umac_nh_n
(
uint64_t
*
out
,
unsigned
n
,
const
uint32_t
*
key
,
C
unsigned
length
,
const
uint8_t
*
msg
)
.text
ALIGN
(
4
)
PROLOGUE
(
_nettle_umac_nh_n
)
W64_ENTRY
(
5
,
14
)
pxor
XY0
,
XY0
cmp
$
3
,
ITERS
jc
.Lnh2
je
.Lnh3
.Lnh4:
movups
(
KEY
),
XK0
movups
16
(
KEY
),
XT2
movups
32
(
KEY
),
XK2
lea
48
(
KEY
),
KEY
C
Leave
XK2
untuched
,
and
put
permuted
keys
in
XK0
,
XK1
,
XT2
,
XT3
movaps
XK0
,
XT0
movaps
XK0
,
XK1
punpcklqdq
XT2
,
XK0
C
[
0
,
1
,
4
,
5
]
punpckhqdq
XT2
,
XK1
C
[
2
,
3
,
6
,
7
]
movaps
XT2
,
XT3
punpcklqdq
XK2
,
XT2
C
[
4
,
5
,
8
,
9
]
punpckhqdq
XK2
,
XT3
C
[
6
,
7
,
10
,
11
]
movaps
XY0
,
XY1
.Loop4:
movups
(
MSG
),
XT0
movups
16
(
MSG
),
XT1
pshufd
$
0xee
,
XT1
,
XM3
C
[
6
,
7
,
6
,
7
]
pshufd
$
0x44
,
XT1
,
XM2
C
[
4
,
5
,
4
,
5
]
pshufd
$
0xee
,
XT0
,
XM1
C
[
2
,
3
,
2
,
3
]
pshufd
$
0x44
,
XT0
,
XM0
C
[
0
,
1
,
0
,
1
]
paddd
XM0
,
XK0
paddd
XM1
,
XK1
paddd
XM2
,
XT2
paddd
XM3
,
XT3
HI2LO
XK0
,
XT0
HI2LO
XT2
,
XT1
pmuludq
XK0
,
XT2
pmuludq
XT0
,
XT1
paddq
XT2
,
XY0
paddq
XT1
,
XY0
HI2LO
XK1
,
XT0
HI2LO
XT3
,
XT1
pmuludq
XK1
,
XT3
pmuludq
XT0
,
XT1
paddq
XT3
,
XY0
paddq
XT1
,
XY0
movaps
XK2
,
XK0
movaps
XK2
,
XK1
movups
(
KEY
),
XT2
movups
16
(
KEY
),
XK2
punpcklqdq
XT2
,
XK0
C
[
8
,
9
,
12
,
13
]
punpckhqdq
XT2
,
XK1
C
[
10
,
11
,
14
,
15
]
movaps
XT2
,
XT3
punpcklqdq
XK2
,
XT2
C
[
12
,
13
,
16
,
17
]
punpckhqdq
XK2
,
XT3
C
[
14
,
15
,
18
,
19
]
paddd
XK0
,
XM0
paddd
XK1
,
XM1
paddd
XT2
,
XM2
paddd
XT3
,
XM3
HI2LO
XM0
,
XT0
HI2LO
XM2
,
XT1
pmuludq
XM0
,
XM2
pmuludq
XT0
,
XT1
paddq
XM2
,
XY1
paddq
XT1
,
XY1
HI2LO
XM1
,
XT0
HI2LO
XM3
,
XT1
pmuludq
XM1
,
XM3
pmuludq
XT0
,
XT1
paddq
XM3
,
XY1
paddq
XT1
,
XY1
subl
$
32
,
XREG
(
LENGTH
)
lea
32
(
MSG
),
MSG
lea
32
(
KEY
),
KEY
ja
.Loop4
movups
XY0
,
(
OUT
)
movups
XY1
,
16
(
OUT
)
W64_EXIT
(
5
,
14
)
ret
.Lnh3:
movups
(
KEY
),
XK0
movups
16
(
KEY
),
XK1
movaps
XY0
,
XY1
.Loop3:
lea
32
(
KEY
),
KEY
movups
(
MSG
),
XT0
movups
16
(
MSG
),
XT1
movups
(
KEY
),
XK2
movups
16
(
KEY
),
XK3
pshufd
$
0xee
,
XT1
,
XM3
C
[
6
,
7
,
6
,
7
]
pshufd
$
0x44
,
XT1
,
XM2
C
[
4
,
5
,
4
,
5
]
pshufd
$
0xee
,
XT0
,
XM1
C
[
2
,
3
,
2
,
3
]
pshufd
$
0x44
,
XT0
,
XM0
C
[
0
,
1
,
0
,
1
]
C
Iteration
2
paddd
XK2
,
XT0
paddd
XK3
,
XT1
HI2LO
XT0
,
XT2
HI2LO
XT1
,
XT3
pmuludq
XT0
,
XT1
pmuludq
XT2
,
XT3
paddq
XT1
,
XY1
paddq
XT3
,
XY1
C
Iteration
0
,
1
movaps
XK0
,
XT0
punpcklqdq
XK1
,
XK0
C
[
0
,
1
,
4
,
5
]
punpckhqdq
XK1
,
XT0
C
[
2
,
3
,
6
,
7
]
paddd
XK0
,
XM0
paddd
XT0
,
XM1
movaps
XK2
,
XK0
movaps
XK1
,
XT0
punpcklqdq
XK2
,
XK1
C
[
4
,
5
,
8
,
9
]
punpckhqdq
XK2
,
XT0
C
[
6
,
7
,
10
,
11
]
paddd
XK1
,
XM2
paddd
XT0
,
XM3
HI2LO
XM0
,
XT0
HI2LO
XM2
,
XT1
pmuludq
XM0
,
XM2
pmuludq
XT0
,
XT1
paddq
XM2
,
XY0
paddq
XT1
,
XY0
HI2LO
XM1
,
XT0
HI2LO
XM3
,
XT1
pmuludq
XM1
,
XM3
pmuludq
XT0
,
XT1
paddq
XM3
,
XY0
paddq
XT1
,
XY0
subl
$
32
,
XREG
(
LENGTH
)
lea
32
(
MSG
),
MSG
movaps
XK2
,
XK0
movaps
XK3
,
XK1
ja
.Loop3
pshufd
$
0xe
,
XY1
,
XT0
paddq
XT0
,
XY1
movups
XY0
,
(
OUT
)
movlpd
XY1
,
16
(
OUT
)
W64_EXIT
(
5
,
14
)
ret
.Lnh2:
C
Explode
message
as
[
0
,
1
,
0
,
1
]
[
2
,
3
,
2
,
3
]
[
4
,
5
,
4
,
5
]
[
6
,
7
,
6
,
7
]
C
Interleave
keys
as
[
0
,
1
,
4
,
5
]
[
2
,
3
,
6
,
7
]
[
4
,
5
,
8
,
9
]
[
7
,
8
,
10
,
11
]
movups
(
KEY
),
XK0
lea
16
(
KEY
),
KEY
.Loop2:
movups
(
MSG
),
XM0
movups
16
(
MSG
),
XM1
pshufd
$
0xee
,
XM1
,
XM3
C
[
6
,
7
,
6
,
7
]
pshufd
$
0x44
,
XM1
,
XM2
C
[
4
,
5
,
4
,
5
]
pshufd
$
0xee
,
XM0
,
XM1
C
[
2
,
3
,
2
,
3
]
pshufd
$
0x44
,
XM0
,
XM0
C
[
0
,
1
,
0
,
1
]
movups
(
KEY
),
XK1
movups
16
(
KEY
),
XK2
movaps
XK0
,
XT0
punpcklqdq
XK1
,
XK0
C
[
0
,
1
,
4
,
5
]
punpckhqdq
XK1
,
XT0
C
[
2
,
3
,
6
,
7
]
paddd
XK0
,
XM0
paddd
XT0
,
XM1
movaps
XK2
,
XK0
movaps
XK1
,
XT0
punpcklqdq
XK2
,
XK1
C
[
4
,
5
,
8
,
9
]
punpckhqdq
XK2
,
XT0
C
[
6
,
7
,
10
,
11
]
paddd
XK1
,
XM2
paddd
XT0
,
XM3
HI2LO
XM0
,
XT0
HI2LO
XM2
,
XT1
pmuludq
XM0
,
XM2
pmuludq
XT0
,
XT1
paddq
XM2
,
XY0
paddq
XT1
,
XY0
HI2LO
XM1
,
XT0
HI2LO
XM3
,
XT1
pmuludq
XM1
,
XM3
pmuludq
XT0
,
XT1
paddq
XM3
,
XY0
paddq
XT1
,
XY0
subl
$
32
,
XREG
(
LENGTH
)
lea
32
(
MSG
),
MSG
lea
32
(
KEY
),
KEY
ja
.Loop2
movups
XY0
,
(
OUT
)
.Lend:
W64_EXIT
(
5
,
14
)
ret
EPILOGUE
(
_nettle_umac_nh_n
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment