Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
N
nettle
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Container registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Wim Lewis
nettle
Commits
db9b8594
Commit
db9b8594
authored
7 years ago
by
Niels Möller
Browse files
Options
Downloads
Patches
Plain Diff
Unroll x86_64 aesni loops.
parent
0a679090
Loading
Loading
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
ChangeLog
+7
-0
7 additions, 0 deletions
ChangeLog
x86_64/aesni/aes-decrypt-internal.asm
+70
-36
70 additions, 36 deletions
x86_64/aesni/aes-decrypt-internal.asm
x86_64/aesni/aes-encrypt-internal.asm
+70
-36
70 additions, 36 deletions
x86_64/aesni/aes-encrypt-internal.asm
with
147 additions
and
72 deletions
ChangeLog
+
7
−
0
View file @
db9b8594
2018-01-03 Niels Möller <nisse@lysator.liu.se>
* x86_64/aesni/aes-encrypt-internal.asm: Read subkeys into xmm
registers before the block loop, and completely unroll the round
loop.
* x86_64/aesni/aes-decrypt-internal.asm: Likewise.
2017-11-19 Niels Möller <nisse@lysator.liu.se>
2017-11-19 Niels Möller <nisse@lysator.liu.se>
* Released nettle-3.4.
* Released nettle-3.4.
...
...
This diff is collapsed.
Click to expand it.
x86_64/aesni/aes-decrypt-internal.asm
+
70
−
36
View file @
db9b8594
...
@@ -2,7 +2,7 @@ C x86_64/aesni/aes-decrypt-internal.asm
...
@@ -2,7 +2,7 @@ C x86_64/aesni/aes-decrypt-internal.asm
ifelse
(
<
ifelse
(
<
Copyright
(
C
)
2015
Niels
M
ö
ller
Copyright
(
C
)
2015
,
2018
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
This
file
is
part
of
GNU
Nettle.
...
@@ -39,15 +39,22 @@ define(<LENGTH>,<%rcx>)
...
@@ -39,15 +39,22 @@ define(<LENGTH>,<%rcx>)
define
(
<
DS
T
>
,
<%
r8
>
)
define
(
<
DS
T
>
,
<%
r8
>
)
define
(
<
SRC
>
,
<%
r9
>
)
define
(
<
SRC
>
,
<%
r9
>
)
C
Round
counter
define
(
<
KEY0
>
,
<%
xmm0
>
)
define
(
<
CNT
>
,
<%
rdx
>
)
define
(
<
KEY1
>
,
<%
xmm1
>
)
C
Subkey
pointer
define
(
<
KEY2
>
,
<%
xmm2
>
)
define
(
<
KEY
>
,
<%
rax
>
)
define
(
<
KEY3
>
,
<%
xmm3
>
)
define
(
<
KEY4
>
,
<%
xmm4
>
)
dnl
aesdec
%
xmm1
,
%
xmm0
define
(
<
KEY5
>
,
<%
xmm5
>
)
define
(
<
AESDEC
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xde
,
0xc1
>
)
define
(
<
KEY6
>
,
<%
xmm6
>
)
dnl
aesdeclast
%
xmm1
,
%
xmm0
define
(
<
KEY7
>
,
<%
xmm7
>
)
define
(
<
AESDECLAST
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xdf
,
0xc1
>
)
define
(
<
KEY8
>
,
<%
xmm8
>
)
define
(
<
KEY9
>
,
<%
xmm9
>
)
define
(
<
KEY10
>
,
<%
xmm10
>
)
define
(
<
KEY11
>
,
<%
xmm11
>
)
define
(
<
KEY12
>
,
<%
xmm12
>
)
define
(
<
KEY13
>
,
<%
xmm13
>
)
define
(
<
KEYLAST
>
,
<%
xmm14
>
)
define
(
<
BL
OCK
>
,
<%
xmm15
>
)
.file
"
aes
-
decrypt
-
internal.asm
"
.file
"
aes
-
decrypt
-
internal.asm
"
...
@@ -58,43 +65,70 @@ define(<AESDECLAST>, <.byte 0x66, 0x0f, 0x38, 0xdf, 0xc1>)
...
@@ -58,43 +65,70 @@ define(<AESDECLAST>, <.byte 0x66, 0x0f, 0x38, 0xdf, 0xc1>)
.text
.text
ALIGN
(
16
)
ALIGN
(
16
)
PROLOGUE
(
_nettle_aes_decrypt
)
PROLOGUE
(
_nettle_aes_decrypt
)
W64_ENTRY
(
6
,
2
)
W64_ENTRY
(
6
,
16
)
shr
$
4
,
LENGTH
shr
$
4
,
LENGTH
test
LENGTH
,
LENGTH
test
LENGTH
,
LENGTH
jz
.Lend
jz
.Lend
decl
XREG
(
ROUNDS
)
movups
(
KEYS
),
KEY0
movups
16
(
KEYS
),
KEY1
movups
32
(
KEYS
),
KEY2
movups
48
(
KEYS
),
KEY3
movups
64
(
KEYS
),
KEY4
movups
80
(
KEYS
),
KEY5
movups
96
(
KEYS
),
KEY6
movups
112
(
KEYS
),
KEY7
movups
128
(
KEYS
),
KEY8
movups
144
(
KEYS
),
KEY9
lea
160
(
KEYS
),
KEYS
sub
$
10
,
XREG
(
ROUNDS
)
C
Al
so
cl
ears
high
half
je
.Lkey_last
movups
(
KEYS
),
KEY10
movups
16
(
KEYS
),
KEY11
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
cmpl
$
2
,
XREG
(
ROUNDS
)
je
.Lkey_last
movups
-
32
(
KEYS
),
KEY12
movups
-
16
(
KEYS
),
KEY13
.Lkey_last:
movups
(
KEYS
),
KEYLAST
.Lblock_loop:
.Lblock_loop:
mov
ROUNDS
,
CNT
movups
(
SRC
),
BL
OCK
mov
KEYS
,
KEY
pxor
KEY0
,
BL
OCK
movups
(
SRC
),
%
xmm0
aesdec
KEY1
,
BL
OCK
C
FIXME
:
Better
al
ignment
of
subkeys
,
so
we
can
use
movaps.
aesdec
KEY2
,
BL
OCK
movups
(
KEY
),
%
xmm1
aesdec
KEY3
,
BL
OCK
pxor
%
xmm1
,
%
xmm0
aesdec
KEY4
,
BL
OCK
aesdec
KEY5
,
BL
OCK
C
FIXME
:
Could
use
some
unrolling.
Al
so
al
l
subkeys
fit
in
aesdec
KEY6
,
BL
OCK
C
registers
,
so
they
could
be
loaded
once
(
on
W64
we
would
aesdec
KEY7
,
BL
OCK
C
need
to
save
and
restore
some
xmm
registers
,
though
)
.
aesdec
KEY8
,
BL
OCK
aesdec
KEY9
,
BL
OCK
.Lround_loop:
testl
XREG
(
ROUNDS
),
XREG
(
ROUNDS
)
add
$
16
,
KEY
je
.Lblock_end
aesdec
KEY10
,
BL
OCK
movups
(
KEY
),
%
xmm1
aesdec
KEY11
,
BL
OCK
AESDEC
C
%
xmm1
,
%
xmm0
cmpl
$
2
,
XREG
(
ROUNDS
)
decl
XREG
(
CNT
)
je
.Lblock_end
jnz
.Lround_loop
aesdec
KEY12
,
BL
OCK
movups
16
(
KEY
),
%
xmm1
aesdec
KEY13
,
BL
OCK
AESDECLAST
C
%
xmm1
,
%
xmm0
.Lblock_end:
movups
%
xmm0
,
(
DS
T
)
aesdeclast
KEYLAST
,
BL
OCK
movups
BL
OCK
,
(
DS
T
)
add
$
16
,
SRC
add
$
16
,
SRC
add
$
16
,
DS
T
add
$
16
,
DS
T
dec
LENGTH
dec
LENGTH
jnz
.Lblock_loop
jnz
.Lblock_loop
.Lend:
.Lend:
W64_EXIT
(
6
,
2
)
W64_EXIT
(
6
,
16
)
ret
ret
EPILOGUE
(
_nettle_aes_decrypt
)
EPILOGUE
(
_nettle_aes_decrypt
)
This diff is collapsed.
Click to expand it.
x86_64/aesni/aes-encrypt-internal.asm
+
70
−
36
View file @
db9b8594
...
@@ -2,7 +2,7 @@ C x86_64/aesni/aes-encrypt-internal.asm
...
@@ -2,7 +2,7 @@ C x86_64/aesni/aes-encrypt-internal.asm
ifelse
(
<
ifelse
(
<
Copyright
(
C
)
2015
Niels
M
ö
ller
Copyright
(
C
)
2015
,
2018
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
This
file
is
part
of
GNU
Nettle.
...
@@ -39,15 +39,22 @@ define(<LENGTH>,<%rcx>)
...
@@ -39,15 +39,22 @@ define(<LENGTH>,<%rcx>)
define
(
<
DS
T
>
,
<%
r8
>
)
define
(
<
DS
T
>
,
<%
r8
>
)
define
(
<
SRC
>
,
<%
r9
>
)
define
(
<
SRC
>
,
<%
r9
>
)
C
Round
counter
define
(
<
KEY0
>
,
<%
xmm0
>
)
define
(
<
CNT
>
,
<%
rdx
>
)
define
(
<
KEY1
>
,
<%
xmm1
>
)
C
Subkey
pointer
define
(
<
KEY2
>
,
<%
xmm2
>
)
define
(
<
KEY
>
,
<%
rax
>
)
define
(
<
KEY3
>
,
<%
xmm3
>
)
define
(
<
KEY4
>
,
<%
xmm4
>
)
dnl
aesenc
%
xmm1
,
%
xmm0
define
(
<
KEY5
>
,
<%
xmm5
>
)
define
(
<
AESENC
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xdc
,
0xc1
>
)
define
(
<
KEY6
>
,
<%
xmm6
>
)
dnl
aesenclast
%
xmm1
,
%
xmm0
define
(
<
KEY7
>
,
<%
xmm7
>
)
define
(
<
AESENCLAST
>
,
<
.byte
0x66
,
0x0f
,
0x38
,
0xdd
,
0xc1
>
)
define
(
<
KEY8
>
,
<%
xmm8
>
)
define
(
<
KEY9
>
,
<%
xmm9
>
)
define
(
<
KEY10
>
,
<%
xmm10
>
)
define
(
<
KEY11
>
,
<%
xmm11
>
)
define
(
<
KEY12
>
,
<%
xmm12
>
)
define
(
<
KEY13
>
,
<%
xmm13
>
)
define
(
<
KEYLAST
>
,
<%
xmm14
>
)
define
(
<
BL
OCK
>
,
<%
xmm15
>
)
.file
"
aes
-
encrypt
-
internal.asm
"
.file
"
aes
-
encrypt
-
internal.asm
"
...
@@ -58,43 +65,70 @@ define(<AESENCLAST>, <.byte 0x66, 0x0f, 0x38, 0xdd, 0xc1>)
...
@@ -58,43 +65,70 @@ define(<AESENCLAST>, <.byte 0x66, 0x0f, 0x38, 0xdd, 0xc1>)
.text
.text
ALIGN
(
16
)
ALIGN
(
16
)
PROLOGUE
(
_nettle_aes_encrypt
)
PROLOGUE
(
_nettle_aes_encrypt
)
W64_ENTRY
(
6
,
2
)
W64_ENTRY
(
6
,
16
)
shr
$
4
,
LENGTH
shr
$
4
,
LENGTH
test
LENGTH
,
LENGTH
test
LENGTH
,
LENGTH
jz
.Lend
jz
.Lend
decl
XREG
(
ROUNDS
)
movups
(
KEYS
),
KEY0
movups
16
(
KEYS
),
KEY1
movups
32
(
KEYS
),
KEY2
movups
48
(
KEYS
),
KEY3
movups
64
(
KEYS
),
KEY4
movups
80
(
KEYS
),
KEY5
movups
96
(
KEYS
),
KEY6
movups
112
(
KEYS
),
KEY7
movups
128
(
KEYS
),
KEY8
movups
144
(
KEYS
),
KEY9
lea
160
(
KEYS
),
KEYS
sub
$
10
,
XREG
(
ROUNDS
)
C
Al
so
cl
ears
high
half
je
.Lkey_last
movups
(
KEYS
),
KEY10
movups
16
(
KEYS
),
KEY11
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
lea
(
KEYS
,
ROUNDS
,
8
),
KEYS
cmpl
$
2
,
XREG
(
ROUNDS
)
je
.Lkey_last
movups
-
32
(
KEYS
),
KEY12
movups
-
16
(
KEYS
),
KEY13
.Lkey_last:
movups
(
KEYS
),
KEYLAST
.Lblock_loop:
.Lblock_loop:
mov
ROUNDS
,
CNT
movups
(
SRC
),
BL
OCK
mov
KEYS
,
KEY
pxor
KEY0
,
BL
OCK
movups
(
SRC
),
%
xmm0
aesenc
KEY1
,
BL
OCK
C
FIXME
:
Better
al
ignment
of
subkeys
,
so
we
can
use
movaps.
aesenc
KEY2
,
BL
OCK
movups
(
KEY
),
%
xmm1
aesenc
KEY3
,
BL
OCK
pxor
%
xmm1
,
%
xmm0
aesenc
KEY4
,
BL
OCK
aesenc
KEY5
,
BL
OCK
C
FIXME
:
Could
use
some
unrolling.
Al
so
al
l
subkeys
fit
in
aesenc
KEY6
,
BL
OCK
C
registers
,
so
they
could
be
loaded
once
(
on
W64
we
would
aesenc
KEY7
,
BL
OCK
C
need
to
save
and
restore
some
xmm
registers
,
though
)
.
aesenc
KEY8
,
BL
OCK
aesenc
KEY9
,
BL
OCK
.Lround_loop:
testl
XREG
(
ROUNDS
),
XREG
(
ROUNDS
)
add
$
16
,
KEY
je
.Lblock_end
aesenc
KEY10
,
BL
OCK
movups
(
KEY
),
%
xmm1
aesenc
KEY11
,
BL
OCK
AESENC
C
%
xmm1
,
%
xmm0
cmpl
$
2
,
XREG
(
ROUNDS
)
decl
XREG
(
CNT
)
je
.Lblock_end
jnz
.Lround_loop
aesenc
KEY12
,
BL
OCK
movups
16
(
KEY
),
%
xmm1
aesenc
KEY13
,
BL
OCK
AESENCLAST
C
%
xmm1
,
%
xmm0
.Lblock_end:
movups
%
xmm0
,
(
DS
T
)
aesenclast
KEYLAST
,
BL
OCK
movups
BL
OCK
,
(
DS
T
)
add
$
16
,
SRC
add
$
16
,
SRC
add
$
16
,
DS
T
add
$
16
,
DS
T
dec
LENGTH
dec
LENGTH
jnz
.Lblock_loop
jnz
.Lblock_loop
.Lend:
.Lend:
W64_EXIT
(
6
,
2
)
W64_EXIT
(
6
,
16
)
ret
ret
EPILOGUE
(
_nettle_aes_encrypt
)
EPILOGUE
(
_nettle_aes_encrypt
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment