Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
nettle
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
5
Merge Requests
5
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
Nettle
nettle
Commits
4611f79c
Commit
4611f79c
authored
Sep 22, 2013
by
Niels Möller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
New x86_64 assembly for gcm hashing.
parent
a99c33b4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
166 additions
and
58 deletions
+166
-58
ChangeLog
ChangeLog
+10
-0
configure.ac
configure.ac
+2
-2
gcm.c
gcm.c
+8
-5
x86_64/gcm-hash8.asm
x86_64/gcm-hash8.asm
+146
-51
No files found.
ChangeLog
View file @
4611f79c
2013-09-22 Niels Möller <nisse@lysator.liu.se>
* x86_64/gcm-hash8.asm: New file.
* x86_64/gcm-gf-mul-8.asm: Deleted.
* configure.ac (asm_nettle_optional_list): Look for gcm-hash8.asm,
not gcm-gf-mul-8.asm.
* gcm.c [HAVE_NATIVE_gcm_hash8]: Make use of (optional) assembly
implementation.
2013-09-21 Niels Möller <nisse@lysator.liu.se>
* Makefile.in (des.po): Add same dependencies as for des.o.
...
...
configure.ac
View file @
4611f79c
...
...
@@ -266,7 +266,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
sha3-permute.asm umac-nh.asm umac-nh-n.asm machine.m4"
# Assembler files which generate additional object files if they are used.
asm_nettle_optional_list="gcm-
gf-mul-
8.asm"
asm_nettle_optional_list="gcm-
hash
8.asm"
asm_hogweed_optional_list=""
if test "x$enable_public_key" = "xyes" ; then
asm_hogweed_optional_list="ecc-192-modp.asm ecc-224-modp.asm \
...
...
@@ -347,7 +347,7 @@ AH_VERBATIM([HAVE_NATIVE],
#undef HAVE_NATIVE_ecc_384_redc
#undef HAVE_NATIVE_ecc_521_modp
#undef HAVE_NATIVE_ecc_521_redc
#undef HAVE_NATIVE_gcm_
gf_mul_
8])
#undef HAVE_NATIVE_gcm_
hash
8])
# Besides getting correct dependencies, the explicit rules also tell
# make that the .s files "ought to exist", so they are preferred over
...
...
gcm.c
View file @
4611f79c
...
...
@@ -214,12 +214,13 @@ gcm_gf_mul (union gcm_block *x, const union gcm_block *table)
memcpy
(
x
->
b
,
Z
.
b
,
sizeof
(
Z
));
}
# elif GCM_TABLE_BITS == 8
# if HAVE_NATIVE_gcm_
gf_mul_
8
# if HAVE_NATIVE_gcm_
hash
8
#define gcm_
gf_mul _nettle_gcm_gf_mul_
8
#define gcm_
hash _nettle_gcm_hash
8
void
gcm_gf_mul
(
union
gcm_block
*
x
,
const
union
gcm_block
*
y
);
# else
/* !HAVE_NATIVE_gcm_gf_mul_8 */
_nettle_gcm_hash8
(
const
struct
gcm_key
*
key
,
union
gcm_block
*
x
,
size_t
length
,
const
uint8_t
*
data
);
# else
/* !HAVE_NATIVE_gcm_hash8 */
static
const
uint16_t
shift_table
[
0x100
]
=
{
W
(
00
,
00
),
W
(
01
,
c2
),
W
(
03
,
84
),
W
(
02
,
46
),
W
(
07
,
08
),
W
(
06
,
ca
),
W
(
04
,
8
c
),
W
(
05
,
4
e
),
...
...
@@ -310,7 +311,7 @@ gcm_gf_mul (union gcm_block *x, const union gcm_block *table)
gcm_gf_shift_8
(
&
Z
);
gcm_gf_add
(
x
,
&
Z
,
&
table
[
x
->
b
[
0
]]);
}
# endif
/* ! HAVE_NATIVE_gcm_
gf_mul_
8 */
# endif
/* ! HAVE_NATIVE_gcm_
hash
8 */
# else
/* GCM_TABLE_BITS != 8 */
# error Unsupported table size.
# endif
/* GCM_TABLE_BITS != 8 */
...
...
@@ -353,6 +354,7 @@ gcm_set_key(struct gcm_key *key,
#endif
}
#ifndef gcm_hash
static
void
gcm_hash
(
const
struct
gcm_key
*
key
,
union
gcm_block
*
x
,
size_t
length
,
const
uint8_t
*
data
)
...
...
@@ -369,6 +371,7 @@ gcm_hash(const struct gcm_key *key, union gcm_block *x,
gcm_gf_mul
(
x
,
key
->
h
);
}
}
#endif
/* !gcm_hash */
static
void
gcm_hash_sizes
(
const
struct
gcm_key
*
key
,
union
gcm_block
*
x
,
...
...
x86_64/gcm-
gf-mul-
8.asm
→
x86_64/gcm-
hash
8.asm
View file @
4611f79c
C
nettle
,
low
-
level
cryptographics
library
C
C
C
Copyright
(
C
)
2013
,
Niels
M
ö
ller
C
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
...
...
@@ -19,47 +19,51 @@ C MA 02111-1301, USA.
C
Register
usage
:
define
(
<
XP
>
,
<%
rdi
>
)
define
(
<
TABLE
>
,
<%
rsi
>
)
define
(
<
XW
>
,
<%
rax
>
)
define
(
<
CNT
>
,
<%
ecx
>
)
define
(
<
Z0
>
,
<%
rdx
>
)
define
(
<
Z1
>
,
<%
r8
>
)
define
(
<
T0
>
,
<%
r9
>
)
define
(
<
T1
>
,
<%
r10
>
)
define
(
<
T2
>
,
<%
r11
>
)
define
(
<
SHIFT_TABLE
>
,
<%
rbx
>
)
C
The
C
code
is
12.5
c
/
byte
,
slower
than
sha1
(
10.6
),
while
this
code
runs
C
at
10.2
,
slightly
faster.
Benchmarked
on
a
low
-
end
AMD
E
-
350
.
.file
"gcm-gf-mul-8.asm"
C
void
_gcm_gf_mul_8
(
union
gcm_block
*
x
,
const
union
gcm_block
*
table
)
define
(
<
KEY
>
,
<%
rdi
>
)
define
(
<
XP
>
,
<%
rsi
>
)
define
(
<
LENGTH
>
,
<%
rdx
>
)
define
(
<
SRC
>
,
<%
rcx
>
)
define
(
<
X0
>
,
<%
rax
>
)
define
(
<
X1
>
,
<%
rbx
>
)
define
(
<
CNT
>
,
<%
ebp
>
)
define
(
<
T0
>
,
<%
r8
>
)
define
(
<
T1
>
,
<%
r9
>
)
define
(
<
T2
>
,
<%
r10
>
)
define
(
<
Z0
>
,
<%
r11
>
)
define
(
<
Z1
>
,
<%
r12
>
)
define
(
<
SHIFT_TABLE
>
,
<%
r13
>
)
.file
"gcm-hash8.asm"
C
void
gcm_hash
(
const
struct
gcm_key
*
key
,
union
gcm_block
*
x
,
C
si
ze_t
length
,
const
uint8_t
*
data
)
.text
ALIGN
(
16
)
PROLOGUE
(
_nettle_gcm_
gf_mul_
8
)
W64_ENTRY
(
2
,
0
)
PROLOGUE
(
_nettle_gcm_
hash
8
)
W64_ENTRY
(
4
,
0
)
push
%
rbx
mov
8
(
XP
),
XW
rol
$
8
,
XW
movzbl
LREG
(
XW
),
XREG
(
T0
)
shl
$
4
,
T0
mov
(
TABLE
,
T0
),
Z0
mov
8
(
TABLE
,
T0
),
Z1
push
%
rbp
push
%
r12
push
%
r13
sub
$
16
,
LENGTH
lea
.Lshift_table
(
%
rip
),
SHIFT_TABLE
movl
$
7
,
CNT
call
.Lmul_word
mov
(
XP
),
XW
movl
$
8
,
CNT
call
.Lmul_word
mov
Z0
,
(
XP
)
mov
Z1
,
8
(
XP
)
W64_EXIT
(
2
,
0
)
pop
%
rbx
ret
mov
(
XP
),
X0
mov
8
(
XP
),
X1
jc
.Lfinal
ALIGN
(
16
)
.Lblock_loop:
xor
(
SRC
),
X0
xor
8
(
SRC
),
X1
.Lblock_mul:
rol
$
8
,
X1
movzbl
LREG
(
X1
),
XREG
(
T1
)
shl
$
4
,
T1
mov
(
KEY
,
T1
),
Z0
mov
8
(
KEY
,
T1
),
Z1
.Lmul_word:
C
shift
Z1
,
Z0
,
transforming
C
+-----------------------+-----------------------+
C
|
15
14
13
12
11
10
09
08
|
07
06
05
04
03
02
01
00
|
...
...
@@ -70,25 +74,118 @@ PROLOGUE(_nettle_gcm_gf_mul_8)
C
+-----------------------+-----------------+-----+
C
xor
|
T
[
15
]
|
C
+-----+
mov
$
7
,
CNT
ALIGN
(
16
)
.Loop_X1:
mov
Z1
,
T1
shr
$
56
,
T1
shl
$
8
,
Z1
mov
Z0
,
T0
shl
$
8
,
Z1
C
Use
shld?
shl
$
8
,
Z0
shr
$
56
,
T1
shr
$
56
,
T0
movzwl
(
SHIFT_TABLE
,
T1
,
2
),
XREG
(
T1
)
rol
$
8
,
XW
xor
T1
,
Z0
rol
$
8
,
X1
movzbl
LREG
(
X1
),
XREG
(
T2
)
shl
$
4
,
T2
xor
(
KEY
,
T2
),
Z0
add
T0
,
Z1
xor
8
(
KEY
,
T2
),
Z1
decl
CNT
jne
.Loop_X1
mov
$
7
,
CNT
ALIGN
(
16
)
.Loop_X0:
mov
Z1
,
T1
shr
$
56
,
T1
shl
$
8
,
Z1
mov
Z0
,
T0
shl
$
8
,
Z0
shr
$
56
,
T0
movzwl
(
SHIFT_TABLE
,
T1
,
2
),
XREG
(
T1
)
xor
T1
,
Z0
movzbl
LREG
(
XW
),
XREG
(
T2
)
rol
$
8
,
X0
movzbl
LREG
(
X0
),
XREG
(
T2
)
shl
$
4
,
T2
xor
(
TABLE
,
T2
),
Z0
xor
8
(
TABLE
,
T2
),
Z1
xor
(
KEY
,
T2
),
Z0
add
T0
,
Z1
xor
8
(
KEY
,
T2
),
Z1
decl
CNT
jne
.Lmul_word
jne
.Loop_X0
mov
Z1
,
T1
shr
$
56
,
T1
shl
$
8
,
Z1
mov
Z0
,
T0
shl
$
8
,
Z0
shr
$
56
,
T0
movzwl
(
SHIFT_TABLE
,
T1
,
2
),
XREG
(
T1
)
xor
T1
,
Z0
rol
$
8
,
X0
movzbl
LREG
(
X0
),
XREG
(
T2
)
shl
$
4
,
T2
mov
(
KEY
,
T2
),
X0
xor
Z0
,
X0
add
T0
,
Z1
mov
8
(
KEY
,
T2
),
X1
xor
Z1
,
X1
add
$
16
,
SRC
sub
$
16
,
LENGTH
jnc
.Lblock_loop
.Lfinal:
add
$
16
,
LENGTH
jnz
.Lpartial
mov
X0
,
(
XP
)
mov
X1
,
8
(
XP
)
pop
%
r13
pop
%
r12
pop
%
rbp
pop
%
rbx
W64_EXIT
(
2
,
0
)
ret
.Lpartial:
C
Read
and
xor
partial
bl
ock
,
then
jump
back
into
the
loop
C
with
LENGTH
==
0
.
cmp
$
8
,
LENGTH
jc
.Llt8
C
8
<
=
LENGTH
<
16
xor
(
SRC
),
X0
add
$
8
,
SRC
sub
$
8
,
LENGTH
jz
.Lblock_mul
call
.Lread_bytes
xor
T0
,
X1
jmp
.Lblock_mul
.Llt8:
C
0
<
LENGTH
<
8
call
.Lread_bytes
xor
T0
,
X0
jmp
.Lblock_mul
C
Read
0
<
LENGTH
<
8
byte
s
at
SRC
,
result
in
T0
.Lread_bytes:
xor
T0
,
T0
sub
$
1
,
SRC
ALIGN
(
16
)
.Lread_loop:
shl
$
8
,
T0
orb
(
SRC
,
LENGTH
),
LREG
(
T0
)
.Lread_next:
sub
$
1
,
LENGTH
jnz
.Lread_loop
ret
EPILOGUE
(
_nettle_gcm_gf_mul_8
)
EPILOGUE
(
_nettle_gcm_hash8
)
define
(
<
W
>
,
<
0
x$2$1
>
)
.section
.rodata
...
...
@@ -126,5 +223,3 @@ define(<W>, <0x$2$1>)
.hword
W
(
a7
,
d0
),
W
(
a6
,
12
),
W
(
a4
,
54
),
W
(
a5
,
96
),
W
(
a0
,
d8
),
W
(
a1
,
1
a
),
W
(
a3
,
5
c
),
W
(
a2
,
9
e
)
.hword
W
(
b5
,
e0
),
W
(
b4
,
22
),
W
(
b6
,
64
),
W
(
b7
,
a6
),
W
(
b2
,
e8
),
W
(
b3
,
2
a
),
W
(
b1
,
6
c
),
W
(
b0
,
ae
)
.hword
W
(
bb
,
f0
),
W
(
ba
,
32
),
W
(
b8
,
74
),
W
(
b9
,
b6
),
W
(
bc
,
f8
),
W
(
bd
,
3
a
),
W
(
bf
,
7
c
),
W
(
be
,
be
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment