Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Dmitry Baryshkov
nettle
Commits
db9b8594
Commit
db9b8594
authored
Jan 03, 2018
by
Niels Möller
Browse files
Unroll x86_64 aesni loops.
parent
0a679090
Changes
3
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
db9b8594
2018-01-03 Niels Möller <nisse@lysator.liu.se>
* x86_64/aesni/aes-encrypt-internal.asm: Read subkeys into xmm
registers before the block loop, and completely unroll the round
loop.
* x86_64/aesni/aes-decrypt-internal.asm: Likewise.
2017-11-19 Niels Möller <nisse@lysator.liu.se>
* Released nettle-3.4.
...
...
x86_64/aesni/aes-decrypt-internal.asm
View file @
db9b8594
...
...
@@ -2,7 +2,7 @@ C x86_64/aesni/aes-decrypt-internal.asm
ifelse
(
<
Copyright
(
C
)
2015
Niels
M
ö
ller
Copyright
(
C
)
2015
,
2018
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
...
...
@@ -39,15 +39,22 @@ define(<LENGTH>,<%rcx>)
define
(
<
DS
T
>
,
<%
r8
>
)
define
(
<
SRC
>
,
<%
r9
>
)
C
Round
counter
define
(
<
CNT
>
,
<%
rdx
>
)
C
Subkey
pointer
define
(
<
KEY
>
,
<%
rax
>
)
dnl
aesdec
%
xmm1
,
%
xmm0
define
(
<
AESDEC
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xde
,
0xc1
>
)
dnl
aesdeclast
%
xmm1
,
%
xmm0
define
(
<
AESDECLAST
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xdf
,
0xc1
>
)
define
(
<
KEY0
>
,
<%
xmm0
>
)
define
(
<
KEY1
>
,
<%
xmm1
>
)
define
(
<
KEY2
>
,
<%
xmm2
>
)
define
(
<
KEY3
>
,
<%
xmm3
>
)
define
(
<
KEY4
>
,
<%
xmm4
>
)
define
(
<
KEY5
>
,
<%
xmm5
>
)
define
(
<
KEY6
>
,
<%
xmm6
>
)
define
(
<
KEY7
>
,
<%
xmm7
>
)
define
(
<
KEY8
>
,
<%
xmm8
>
)
define
(
<
KEY9
>
,
<%
xmm9
>
)
define
(
<
KEY10
>
,
<%
xmm10
>
)
define
(
<
KEY11
>
,
<%
xmm11
>
)
define
(
<
KEY12
>
,
<%
xmm12
>
)
define
(
<
KEY13
>
,
<%
xmm13
>
)
define
(
<
KEYLAST
>
,
<%
xmm14
>
)
define
(
<
BL
OCK
>
,
<%
xmm15
>
)
.file
"
aes
-
decrypt
-
internal.asm
"
...
...
@@ -58,43 +65,70 @@ define(<AESDECLAST>, <.byte 0x66, 0x0f, 0x38, 0xdf, 0xc1>)
.text
ALIGN
(
16
)
PROLOGUE
(
_nettle_aes_decrypt
)
W64_ENTRY
(
6
,
2
)
W64_ENTRY
(
6
,
16
)
shr
$
4
,
LENGTH
test
LENGTH
,
LENGTH
jz
.Lend
decl
XREG
(
ROUNDS
)
movups
(
KEYS
),
KEY0
movups
16
(
KEYS
),
KEY1
movups
32
(
KEYS
),
KEY2
movups
48
(
KEYS
),
KEY3
movups
64
(
KEYS
),
KEY4
movups
80
(
KEYS
),
KEY5
movups
96
(
KEYS
),
KEY6
movups
112
(
KEYS
),
KEY7
movups
128
(
KEYS
),
KEY8
movups
144
(
KEYS
),
KEY9
lea
160
(
KEYS
),
KEYS
sub
$
10
,
XREG
(
ROUNDS
)
C
Al
so
cl
ears
high
half
je
.Lkey_last
movups
(
KEYS
),
KEY10
movups
16
(
KEYS
),
KEY11
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
cmpl
$
2
,
XREG
(
ROUNDS
)
je
.Lkey_last
movups
-
32
(
KEYS
),
KEY12
movups
-
16
(
KEYS
),
KEY13
.Lkey_last:
movups
(
KEYS
),
KEYLAST
.Lblock_loop:
mov
ROUNDS
,
CNT
mov
KEYS
,
KEY
movups
(
SRC
),
%
xmm0
C
FIXME
:
Better
al
ignment
of
subkeys
,
so
we
can
use
movaps.
movups
(
KEY
),
%
xmm1
pxor
%
xmm1
,
%
xmm0
C
FIXME
:
Could
use
some
unrolling.
Al
so
al
l
subkeys
fit
in
C
registers
,
so
they
could
be
loaded
once
(
on
W64
we
would
C
need
to
save
and
restore
some
xmm
registers
,
though
)
.
.Lround_loop:
add
$
16
,
KEY
movups
(
KEY
),
%
xmm1
AESDEC
C
%
xmm1
,
%
xmm0
decl
XREG
(
CNT
)
jnz
.Lround_loop
movups
16
(
KEY
),
%
xmm1
AESDECLAST
C
%
xmm1
,
%
xmm0
movups
%
xmm0
,
(
DS
T
)
movups
(
SRC
),
BL
OCK
pxor
KEY0
,
BL
OCK
aesdec
KEY1
,
BL
OCK
aesdec
KEY2
,
BL
OCK
aesdec
KEY3
,
BL
OCK
aesdec
KEY4
,
BL
OCK
aesdec
KEY5
,
BL
OCK
aesdec
KEY6
,
BL
OCK
aesdec
KEY7
,
BL
OCK
aesdec
KEY8
,
BL
OCK
aesdec
KEY9
,
BL
OCK
testl
XREG
(
ROUNDS
),
XREG
(
ROUNDS
)
je
.Lblock_end
aesdec
KEY10
,
BL
OCK
aesdec
KEY11
,
BL
OCK
cmpl
$
2
,
XREG
(
ROUNDS
)
je
.Lblock_end
aesdec
KEY12
,
BL
OCK
aesdec
KEY13
,
BL
OCK
.Lblock_end:
aesdeclast
KEYLAST
,
BL
OCK
movups
BL
OCK
,
(
DS
T
)
add
$
16
,
SRC
add
$
16
,
DS
T
dec
LENGTH
jnz
.Lblock_loop
.Lend:
W64_EXIT
(
6
,
2
)
W64_EXIT
(
6
,
16
)
ret
EPILOGUE
(
_nettle_aes_decrypt
)
x86_64/aesni/aes-encrypt-internal.asm
View file @
db9b8594
...
...
@@ -2,7 +2,7 @@ C x86_64/aesni/aes-encrypt-internal.asm
ifelse
(
<
Copyright
(
C
)
2015
Niels
M
ö
ller
Copyright
(
C
)
2015
,
2018
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
...
...
@@ -39,16 +39,23 @@ define(<LENGTH>,<%rcx>)
define
(
<
DS
T
>
,
<%
r8
>
)
define
(
<
SRC
>
,
<%
r9
>
)
C
Round
counter
define
(
<
CNT
>
,
<%
rdx
>
)
C
Subkey
pointer
define
(
<
KEY
>
,
<%
rax
>
)
define
(
<
KEY0
>
,
<%
xmm0
>
)
define
(
<
KEY1
>
,
<%
xmm1
>
)
define
(
<
KEY2
>
,
<%
xmm2
>
)
define
(
<
KEY3
>
,
<%
xmm3
>
)
define
(
<
KEY4
>
,
<%
xmm4
>
)
define
(
<
KEY5
>
,
<%
xmm5
>
)
define
(
<
KEY6
>
,
<%
xmm6
>
)
define
(
<
KEY7
>
,
<%
xmm7
>
)
define
(
<
KEY8
>
,
<%
xmm8
>
)
define
(
<
KEY9
>
,
<%
xmm9
>
)
define
(
<
KEY10
>
,
<%
xmm10
>
)
define
(
<
KEY11
>
,
<%
xmm11
>
)
define
(
<
KEY12
>
,
<%
xmm12
>
)
define
(
<
KEY13
>
,
<%
xmm13
>
)
define
(
<
KEYLAST
>
,
<%
xmm14
>
)
define
(
<
BL
OCK
>
,
<%
xmm15
>
)
dnl
aesenc
%
xmm1
,
%
xmm0
define
(
<
AESENC
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xdc
,
0xc1
>
)
dnl
aesenclast
%
xmm1
,
%
xmm0
define
(
<
AESENCLAST
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xdd
,
0xc1
>
)
.file
"
aes
-
encrypt
-
internal.asm
"
C
_aes_encrypt
(
unsigned
rounds
,
const
uint32_t
*
keys
,
...
...
@@ -58,43 +65,70 @@ define(<AESENCLAST>, <.byte 0x66, 0x0f, 0x38, 0xdd, 0xc1>)
.text
ALIGN
(
16
)
PROLOGUE
(
_nettle_aes_encrypt
)
W64_ENTRY
(
6
,
2
)
W64_ENTRY
(
6
,
16
)
shr
$
4
,
LENGTH
test
LENGTH
,
LENGTH
jz
.Lend
decl
XREG
(
ROUNDS
)
movups
(
KEYS
),
KEY0
movups
16
(
KEYS
),
KEY1
movups
32
(
KEYS
),
KEY2
movups
48
(
KEYS
),
KEY3
movups
64
(
KEYS
),
KEY4
movups
80
(
KEYS
),
KEY5
movups
96
(
KEYS
),
KEY6
movups
112
(
KEYS
),
KEY7
movups
128
(
KEYS
),
KEY8
movups
144
(
KEYS
),
KEY9
lea
160
(
KEYS
),
KEYS
sub
$
10
,
XREG
(
ROUNDS
)
C
Al
so
cl
ears
high
half
je
.Lkey_last
movups
(
KEYS
),
KEY10
movups
16
(
KEYS
),
KEY11
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
cmpl
$
2
,
XREG
(
ROUNDS
)
je
.Lkey_last
movups
-
32
(
KEYS
),
KEY12
movups
-
16
(
KEYS
),
KEY13
.Lkey_last:
movups
(
KEYS
),
KEYLAST
.Lblock_loop:
mov
ROUNDS
,
CNT
mov
KEYS
,
KEY
movups
(
SRC
),
%
xmm0
C
FIXME
:
Better
al
ignment
of
subkeys
,
so
we
can
use
movaps.
movups
(
KEY
),
%
xmm1
pxor
%
xmm1
,
%
xmm0
C
FIXME
:
Could
use
some
unrolling.
Al
so
al
l
subkeys
fit
in
C
registers
,
so
they
could
be
loaded
once
(
on
W64
we
would
C
need
to
save
and
restore
some
xmm
registers
,
though
)
.
.Lround_loop:
add
$
16
,
KEY
movups
(
KEY
),
%
xmm1
AESENC
C
%
xmm1
,
%
xmm0
decl
XREG
(
CNT
)
jnz
.Lround_loop
movups
16
(
KEY
),
%
xmm1
AESENCLAST
C
%
xmm1
,
%
xmm0
movups
%
xmm0
,
(
DS
T
)
movups
(
SRC
),
BL
OCK
pxor
KEY0
,
BL
OCK
aesenc
KEY1
,
BL
OCK
aesenc
KEY2
,
BL
OCK
aesenc
KEY3
,
BL
OCK
aesenc
KEY4
,
BL
OCK
aesenc
KEY5
,
BL
OCK
aesenc
KEY6
,
BL
OCK
aesenc
KEY7
,
BL
OCK
aesenc
KEY8
,
BL
OCK
aesenc
KEY9
,
BL
OCK
testl
XREG
(
ROUNDS
),
XREG
(
ROUNDS
)
je
.Lblock_end
aesenc
KEY10
,
BL
OCK
aesenc
KEY11
,
BL
OCK
cmpl
$
2
,
XREG
(
ROUNDS
)
je
.Lblock_end
aesenc
KEY12
,
BL
OCK
aesenc
KEY13
,
BL
OCK
.Lblock_end:
aesenclast
KEYLAST
,
BL
OCK
movups
BL
OCK
,
(
DS
T
)
add
$
16
,
SRC
add
$
16
,
DS
T
dec
LENGTH
jnz
.Lblock_loop
.Lend:
W64_EXIT
(
6
,
2
)
W64_EXIT
(
6
,
16
)
ret
EPILOGUE
(
_nettle_aes_encrypt
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment