Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Wim Lewis
nettle
Commits
93037338
Commit
93037338
authored
Apr 16, 2013
by
Niels Möller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86_64 assembly for umac_nh_n.
parent
0f10b7b4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
263 additions
and
0 deletions
+263
-0
ChangeLog
ChangeLog
+2
-0
x86_64/umac-nh-n.asm
x86_64/umac-nh-n.asm
+261
-0
No files found.
ChangeLog
View file @
93037338
2013-04-16 Niels Möller <nisse@lysator.liu.se>
* x86_64/umac-nh-n.asm: New file, 3.5 time speedup.
* umac32.c (umac32_digest): Fix nonce caching.
* umac64.c (umac64_digest): Likewise.
...
...
x86_64/umac-nh-n.asm
0 → 100644
View file @
93037338
C
nettle
,
low
-
level
cryptographics
library
C
C
Copyright
(
C
)
2013
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
C
MA
02111
-
1301
,
USA.
define
(
<
OUT
>
,
<%
rdi
>
)
define
(
<
ITERS
>
,
<%
rsi
>
)
define
(
<
KEY
>
,
<%
rdx
>
)
define
(
<
LENGTH
>
,
<%
rcx
>
)
define
(
<
MSG
>
,
<%
r8
>
)
define
(
<
XM0
>
,
<%
xmm0
>
)
define
(
<
XM1
>
,
<%
xmm1
>
)
define
(
<
XM2
>
,
<%
xmm2
>
)
define
(
<
XM3
>
,
<%
xmm3
>
)
define
(
<
XK0
>
,
<%
xmm4
>
)
define
(
<
XK1
>
,
<%
xmm5
>
)
define
(
<
XK2
>
,
<%
xmm6
>
)
define
(
<
XK3
>
,
<%
xmm7
>
)
define
(
<
XT0
>
,
<%
xmm8
>
)
define
(
<
XT1
>
,
<%
xmm9
>
)
define
(
<
XT2
>
,
<%
xmm10
>
)
define
(
<
XT3
>
,
<%
xmm11
>
)
define
(
<
XY0
>
,
<%
xmm12
>
)
define
(
<
XY1
>
,
<%
xmm13
>
)
C
Copy
[
0
,
1
,
2
,
3
]
to
[
1
,
1
,
3
,
3
]
define
(
<
HI2LO
>
,
<
pshufd
<
$
>
0xf5
,
>
)
C
FIXME
:
Would
be
nice
if
we
could
force
the
key
array
to
be
16
-
byte
C
al
igned.
.file
"
umac
-
nh
-
n.asm
"
C
umac_nh_n
(
uint64_t
*
out
,
unsigned
n
,
const
uint32_t
*
key
,
C
unsigned
length
,
const
uint8_t
*
msg
)
.text
ALIGN
(
4
)
PROLOGUE
(
_nettle_umac_nh_n
)
W64_ENTRY
(
5
,
14
)
pxor
XY0
,
XY0
cmp
$
3
,
ITERS
jc
.Lnh2
je
.Lnh3
.Lnh4:
movups
(
KEY
),
XK0
movups
16
(
KEY
),
XT2
movups
32
(
KEY
),
XK2
lea
48
(
KEY
),
KEY
C
Leave
XK2
untuched
,
and
put
permuted
keys
in
XK0
,
XK1
,
XT2
,
XT3
movaps
XK0
,
XT0
movaps
XK0
,
XK1
punpcklqdq
XT2
,
XK0
C
[
0
,
1
,
4
,
5
]
punpckhqdq
XT2
,
XK1
C
[
2
,
3
,
6
,
7
]
movaps
XT2
,
XT3
punpcklqdq
XK2
,
XT2
C
[
4
,
5
,
8
,
9
]
punpckhqdq
XK2
,
XT3
C
[
6
,
7
,
10
,
11
]
movaps
XY0
,
XY1
.Loop4:
movups
(
MSG
),
XT0
movups
16
(
MSG
),
XT1
pshufd
$
0xee
,
XT1
,
XM3
C
[
6
,
7
,
6
,
7
]
pshufd
$
0x44
,
XT1
,
XM2
C
[
4
,
5
,
4
,
5
]
pshufd
$
0xee
,
XT0
,
XM1
C
[
2
,
3
,
2
,
3
]
pshufd
$
0x44
,
XT0
,
XM0
C
[
0
,
1
,
0
,
1
]
paddd
XM0
,
XK0
paddd
XM1
,
XK1
paddd
XM2
,
XT2
paddd
XM3
,
XT3
HI2LO
XK0
,
XT0
HI2LO
XT2
,
XT1
pmuludq
XK0
,
XT2
pmuludq
XT0
,
XT1
paddq
XT2
,
XY0
paddq
XT1
,
XY0
HI2LO
XK1
,
XT0
HI2LO
XT3
,
XT1
pmuludq
XK1
,
XT3
pmuludq
XT0
,
XT1
paddq
XT3
,
XY0
paddq
XT1
,
XY0
movaps
XK2
,
XK0
movaps
XK2
,
XK1
movups
(
KEY
),
XT2
movups
16
(
KEY
),
XK2
punpcklqdq
XT2
,
XK0
C
[
8
,
9
,
12
,
13
]
punpckhqdq
XT2
,
XK1
C
[
10
,
11
,
14
,
15
]
movaps
XT2
,
XT3
punpcklqdq
XK2
,
XT2
C
[
12
,
13
,
16
,
17
]
punpckhqdq
XK2
,
XT3
C
[
14
,
15
,
18
,
19
]
paddd
XK0
,
XM0
paddd
XK1
,
XM1
paddd
XT2
,
XM2
paddd
XT3
,
XM3
HI2LO
XM0
,
XT0
HI2LO
XM2
,
XT1
pmuludq
XM0
,
XM2
pmuludq
XT0
,
XT1
paddq
XM2
,
XY1
paddq
XT1
,
XY1
HI2LO
XM1
,
XT0
HI2LO
XM3
,
XT1
pmuludq
XM1
,
XM3
pmuludq
XT0
,
XT1
paddq
XM3
,
XY1
paddq
XT1
,
XY1
subl
$
32
,
XREG
(
LENGTH
)
lea
32
(
MSG
),
MSG
lea
32
(
KEY
),
KEY
ja
.Loop4
movups
XY0
,
(
OUT
)
movups
XY1
,
16
(
OUT
)
W64_EXIT
(
5
,
14
)
ret
.Lnh3:
movups
(
KEY
),
XK0
movups
16
(
KEY
),
XK1
movaps
XY0
,
XY1
.Loop3:
lea
32
(
KEY
),
KEY
movups
(
MSG
),
XT0
movups
16
(
MSG
),
XT1
movups
(
KEY
),
XK2
movups
16
(
KEY
),
XK3
pshufd
$
0xee
,
XT1
,
XM3
C
[
6
,
7
,
6
,
7
]
pshufd
$
0x44
,
XT1
,
XM2
C
[
4
,
5
,
4
,
5
]
pshufd
$
0xee
,
XT0
,
XM1
C
[
2
,
3
,
2
,
3
]
pshufd
$
0x44
,
XT0
,
XM0
C
[
0
,
1
,
0
,
1
]
C
Iteration
2
paddd
XK2
,
XT0
paddd
XK3
,
XT1
HI2LO
XT0
,
XT2
HI2LO
XT1
,
XT3
pmuludq
XT0
,
XT1
pmuludq
XT2
,
XT3
paddq
XT1
,
XY1
paddq
XT3
,
XY1
C
Iteration
0
,
1
movaps
XK0
,
XT0
punpcklqdq
XK1
,
XK0
C
[
0
,
1
,
4
,
5
]
punpckhqdq
XK1
,
XT0
C
[
2
,
3
,
6
,
7
]
paddd
XK0
,
XM0
paddd
XT0
,
XM1
movaps
XK2
,
XK0
movaps
XK1
,
XT0
punpcklqdq
XK2
,
XK1
C
[
4
,
5
,
8
,
9
]
punpckhqdq
XK2
,
XT0
C
[
6
,
7
,
10
,
11
]
paddd
XK1
,
XM2
paddd
XT0
,
XM3
HI2LO
XM0
,
XT0
HI2LO
XM2
,
XT1
pmuludq
XM0
,
XM2
pmuludq
XT0
,
XT1
paddq
XM2
,
XY0
paddq
XT1
,
XY0
HI2LO
XM1
,
XT0
HI2LO
XM3
,
XT1
pmuludq
XM1
,
XM3
pmuludq
XT0
,
XT1
paddq
XM3
,
XY0
paddq
XT1
,
XY0
subl
$
32
,
XREG
(
LENGTH
)
lea
32
(
MSG
),
MSG
movaps
XK2
,
XK0
movaps
XK3
,
XK1
ja
.Loop3
pshufd
$
0xe
,
XY1
,
XT0
paddq
XT0
,
XY1
movups
XY0
,
(
OUT
)
movlpd
XY1
,
16
(
OUT
)
W64_EXIT
(
5
,
14
)
ret
.Lnh2:
C
Explode
message
as
[
0
,
1
,
0
,
1
]
[
2
,
3
,
2
,
3
]
[
4
,
5
,
4
,
5
]
[
6
,
7
,
6
,
7
]
C
Interleave
keys
as
[
0
,
1
,
4
,
5
]
[
2
,
3
,
6
,
7
]
[
4
,
5
,
8
,
9
]
[
7
,
8
,
10
,
11
]
movups
(
KEY
),
XK0
lea
16
(
KEY
),
KEY
.Loop2:
movups
(
MSG
),
XM0
movups
16
(
MSG
),
XM1
pshufd
$
0xee
,
XM1
,
XM3
C
[
6
,
7
,
6
,
7
]
pshufd
$
0x44
,
XM1
,
XM2
C
[
4
,
5
,
4
,
5
]
pshufd
$
0xee
,
XM0
,
XM1
C
[
2
,
3
,
2
,
3
]
pshufd
$
0x44
,
XM0
,
XM0
C
[
0
,
1
,
0
,
1
]
movups
(
KEY
),
XK1
movups
16
(
KEY
),
XK2
movaps
XK0
,
XT0
punpcklqdq
XK1
,
XK0
C
[
0
,
1
,
4
,
5
]
punpckhqdq
XK1
,
XT0
C
[
2
,
3
,
6
,
7
]
paddd
XK0
,
XM0
paddd
XT0
,
XM1
movaps
XK2
,
XK0
movaps
XK1
,
XT0
punpcklqdq
XK2
,
XK1
C
[
4
,
5
,
8
,
9
]
punpckhqdq
XK2
,
XT0
C
[
6
,
7
,
10
,
11
]
paddd
XK1
,
XM2
paddd
XT0
,
XM3
HI2LO
XM0
,
XT0
HI2LO
XM2
,
XT1
pmuludq
XM0
,
XM2
pmuludq
XT0
,
XT1
paddq
XM2
,
XY0
paddq
XT1
,
XY0
HI2LO
XM1
,
XT0
HI2LO
XM3
,
XT1
pmuludq
XM1
,
XM3
pmuludq
XT0
,
XT1
paddq
XM3
,
XY0
paddq
XT1
,
XY0
subl
$
32
,
XREG
(
LENGTH
)
lea
32
(
MSG
),
MSG
lea
32
(
KEY
),
KEY
ja
.Loop2
movups
XY0
,
(
OUT
)
.Lend:
W64_EXIT
(
5
,
14
)
ret
EPILOGUE
(
_nettle_umac_nh_n
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment