Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Dmitry Baryshkov
nettle
Commits
93694133
Commit
93694133
authored
Nov 24, 2014
by
Niels Möller
Browse files
Reorganized x86_64 memxor.
parent
ac2cf647
Changes
4
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
93694133
2014-11-24 Niels Möller <nisse@lysator.liu.se>
* x86_64/memxor3.asm (memxor3): New file, code moved from old
memxor.asm.
* x86_64/memxor.asm (memxor): Rewritten, no longer jumps into
memxor3.
* configure.ac (asm_replace_list): Added memxor.asm and
memxor3.asm.
2014-10-23 Niels Möller <nisse@lysator.liu.se>
* configure.ac (IF_ASM): New substituted variable.
...
...
configure.ac
View file @
93694133
...
...
@@ -272,7 +272,8 @@ fi
# to a new object file).
asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
arcfour-crypt.asm camellia-crypt-internal.asm \
md5-compress.asm poly1305-internal.asm \
md5-compress.asm memxor.asm memxor3.asm \
poly1305-internal.asm \
chacha-core-internal.asm \
salsa20-crypt.asm salsa20-core-internal.asm \
serpent-encrypt.asm serpent-decrypt.asm \
...
...
x86_64/memxor.asm
View file @
93694133
C
x86_64
/
memxor.asm
ifelse
(
<
Copyright
(
C
)
2010
,
Niels
M
ö
ller
Copyright
(
C
)
2010
,
2014
,
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
...
...
@@ -32,9 +32,8 @@ ifelse(<
C
Register
usage
:
define
(
<
DS
T
>
,
<%
rax
>
)
C
Originally
in
%
rdi
define
(
<
AP
>
,
<%
rsi
>
)
define
(
<
BP
>
,
<%
rdx
>
)
define
(
<
N
>
,
<%
r10
>
)
define
(
<
SRC
>
,
<%
rsi
>
)
define
(
<
N
>
,
<%
rdx
>
)
define
(
<
TMP
>
,
<%
r8
>
)
define
(
<
TMP2
>
,
<%
r9
>
)
define
(
<
CNT
>
,
<%
rdi
>
)
...
...
@@ -53,20 +52,7 @@ define(<USE_SSE2>, <no>)
PROLOGUE
(
nettle_memxor
)
W64_ENTRY
(
3
,
0
)
mov
%
rdx
,
%
r10
mov
%
rdi
,
%
rdx
jmp
.Lmemxor3_entry
EPILOGUE
(
nettle_memxor
)
C
memxor3
(
void
*
ds
t
,
const
void
*
a
,
const
void
*
b
,
si
ze_t
n
)
C
%
rdi
%
rsi
%
rdx
%
rcx
ALIGN
(
16
)
PROLOGUE
(
nettle_memxor3
)
W64_ENTRY
(
4
,
0
)
C
%
cl
needed
for
shift
count
,
so
move
away
N
mov
%
rcx
,
N
.Lmemxor3_entry:
test
N
,
N
C
Get
number
of
unaligned
byte
s
at
the
end
C
%
rdi
is
used
as
CNT
,
%
rax
as
DS
T
and
as
return
value
...
...
@@ -87,9 +73,8 @@ PROLOGUE(nettle_memxor3)
.Lalign_loop:
sub
$
1
,
N
movb
(
AP
,
N
),
LREG
(
TMP
)
xorb
(
BP
,
N
),
LREG
(
TMP
)
movb
LREG
(
TMP
),
(
DS
T
,
N
)
movb
(
SRC
,
N
),
LREG
(
TMP
)
xorb
LREG
(
TMP
),
(
DS
T
,
N
)
sub
$
1
,
CNT
jnz
.Lalign_loop
...
...
@@ -98,83 +83,7 @@ ifelse(USE_SSE2, yes, <
cmp
$
16
,
N
jnc
.Lsse2_case
>)
C
Ch
eck
for
the
case
that
AP
and
BP
have
the
same
al
ignment
,
C
but
di
fferent
from
DS
T.
mov
AP
,
TMP
sub
BP
,
TMP
test
$
7
,
TMP
jnz
.Lno_shift_case
mov
AP
,
%
rcx
sub
DS
T
,
%
rcx
and
$
7
,
%
rcx
jz
.Lno_shift_case
sub
%
rcx
,
AP
sub
%
rcx
,
BP
shl
$
3
,
%
rcx
C
Unrolling
,
with
al
igned
values
al
ternating
in
S0
and
S1
test
$
8
,
N
jnz
.Lshift_odd
mov
(
AP
,
N
),
S1
xor
(
BP
,
N
),
S1
jmp
.Lshift_next
.Lshift_odd:
mov
-
8
(
AP
,
N
),
S1
mov
(
AP
,
N
),
S0
xor
-
8
(
BP
,
N
),
S1
xor
(
BP
,
N
),
S0
mov
S1
,
TMP
shr
%
cl
,
TMP
neg
%
cl
shl
%
cl
,
S0
neg
%
cl
or
S0
,
TMP
mov
TMP
,
-
8
(
DS
T
,
N
)
sub
$
8
,
N
jz
.Ldone
jmp
.Lshift_next
ALIGN
(
16
)
.Lshift_loop:
mov
8
(
AP
,
N
),
S0
xor
8
(
BP
,
N
),
S0
mov
S0
,
TMP
shr
%
cl
,
TMP
neg
%
cl
shl
%
cl
,
S1
neg
%
cl
or
S1
,
TMP
mov
TMP
,
8
(
DS
T
,
N
)
mov
(
AP
,
N
),
S1
xor
(
BP
,
N
),
S1
mov
S1
,
TMP
shr
%
cl
,
TMP
neg
%
cl
shl
%
cl
,
S0
neg
%
cl
or
S0
,
TMP
mov
TMP
,
(
DS
T
,
N
)
.Lshift_next:
sub
$
16
,
N
C
FIXME
:
Handle
the
case
N
==
16
sp
ecially
,
C
like
in
the
non
-
shifted
case?
C
ja
.Lshift_loop
C
jz
.Ldone
jnc
.Lshift_loop
add
$
15
,
N
jnc
.Ldone
shr
$
3
,
%
rcx
add
%
rcx
,
AP
add
%
rcx
,
BP
jmp
.Lfinal_loop
.Lno_shift_case:
C
Next
destination
word
is
-
8
(
DS
T
,
N
)
C
Setup
for
unrolling
test
$
8
,
N
...
...
@@ -183,21 +92,18 @@ C jz .Ldone
sub
$
8
,
N
jz
.Lone_word
mov
(
AP
,
N
),
TMP
xor
(
BP
,
N
),
TMP
mov
TMP
,
(
DS
T
,
N
)
mov
(
SRC
,
N
),
TMP
xor
TMP
,
(
DS
T
,
N
)
jmp
.Lword_next
ALIGN
(
16
)
.Lword_loop:
mov
8
(
AP
,
N
),
TMP
mov
(
AP
,
N
),
TMP2
xor
8
(
BP
,
N
),
TMP
xor
(
BP
,
N
),
TMP2
mov
TMP
,
8
(
DS
T
,
N
)
mov
TMP2
,
(
DS
T
,
N
)
mov
8
(
SRC
,
N
),
TMP
mov
(
SRC
,
N
),
TMP2
xor
TMP
,
8
(
DS
T
,
N
)
xor
TMP2
,
(
DS
T
,
N
)
.Lword_next:
sub
$
16
,
N
...
...
@@ -205,33 +111,28 @@ C jz .Ldone
jnz
.Lfinal
C
Final
operation
is
word
al
igned
mov
8
(
AP
,
N
),
TMP
xor
8
(
BP
,
N
),
TMP
mov
TMP
,
8
(
DS
T
,
N
)
mov
8
(
SRC
,
N
),
TMP
xor
TMP
,
8
(
DS
T
,
N
)
.Lone_word:
mov
(
AP
,
N
),
TMP
xor
(
BP
,
N
),
TMP
mov
TMP
,
(
DS
T
,
N
)
mov
(
SRC
,
N
),
TMP
xor
TMP
,
(
DS
T
,
N
)
C
ENTRY
might
have
been
3
args
,
too
,
but
it
doesn
'
t
matter
for
the
exit
W64_EXIT
(
4
,
0
)
W64_EXIT
(
3
,
0
)
ret
.Lfinal:
add
$
15
,
N
.Lfinal_loop:
movb
(
AP
,
N
),
LREG
(
TMP
)
xorb
(
BP
,
N
),
LREG
(
TMP
)
movb
LREG
(
TMP
),
(
DS
T
,
N
)
movb
(
SRC
,
N
),
LREG
(
TMP
)
xorb
LREG
(
TMP
),
(
DS
T
,
N
)
.Lfinal_next:
sub
$
1
,
N
jnc
.Lfinal_loop
.Ldone:
C
ENTRY
might
have
been
3
args
,
too
,
but
it
doesn
'
t
matter
for
the
exit
W64_EXIT
(
4
,
0
)
W64_EXIT
(
3
,
0
)
ret
ifelse
(
USE_SSE2
,
yes
,
<
...
...
@@ -241,15 +142,14 @@ ifelse(USE_SSE2, yes, <
test
$
8
,
TMP
jz
.Lsse2_next
sub
$
8
,
N
mov
(
AP
,
N
),
TMP
xor
(
BP
,
N
),
TMP
mov
TMP
,
(
DS
T
,
N
)
mov
(
SRC
,
N
),
TMP
xor
TMP
,
(
DS
T
,
N
)
jmp
.Lsse2_next
ALIGN
(
16
)
.Lsse2_loop:
movdqu
(
AP
,
N
),
%
xmm0
movdq
u
(
BP
,
N
),
%
xmm1
movdqu
(
SRC
,
N
),
%
xmm0
movdq
a
(
DS
T
,
N
),
%
xmm1
pxor
%
xmm0
,
%
xmm1
movdqa
%
xmm1
,
(
DS
T
,
N
)
.Lsse2_next:
...
...
@@ -261,14 +161,13 @@ ifelse(USE_SSE2, yes, <
jnz
.Lfinal
C
Final
operation
is
al
igned
movdqu
(
AP
),
%
xmm0
movdq
u
(
BP
),
%
xmm1
movdqu
(
SRC
),
%
xmm0
movdq
a
(
DS
T
),
%
xmm1
pxor
%
xmm0
,
%
xmm1
movdqa
%
xmm1
,
(
DS
T
)
C
ENTRY
might
have
been
3
args
,
too
,
but
it
doesn
'
t
matter
for
the
exit
W64_EXIT
(
4
,
0
)
W64_EXIT
(
3
,
0
)
ret
>)
EPILOGUE
(
nettle_memxor
3
)
EPILOGUE
(
nettle_memxor
)
x86_64/memxor3.asm
0 → 100644
View file @
93694133
C
x86_64
/
memxor3.asm
ifelse
(
<
Copyright
(
C
)
2010
,
2014
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
GNU
Nettle
is
free
software
:
you
can
redistribute
it
and
/
or
modify
it
under
the
terms
of
either
:
*
the
GNU
Lesser
General
Public
License
as
published
by
the
Free
Software
Foundation
; either version 3 of the License, or (at your
option
)
any
later
version.
or
*
the
GNU
General
Public
License
as
published
by
the
Free
Software
Foundation
; either version 2 of the License, or (at your
option
)
any
later
version.
or
both
in
parallel
,
as
here.
GNU
Nettle
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
WITHOUT
ANY
WARRANTY
; without even the implied warranty of
MERCHANTABILITY
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
General
Public
License
for
more
details.
You
should
have
received
copies
of
the
GNU
General
Public
License
and
the
GNU
Lesser
General
Public
License
al
ong
with
this
program.
If
not
,
see
http
:
//
www.gnu.org
/
licenses
/
.
>)
C
Register
usage
:
define
(
<
DS
T
>
,
<%
rax
>
)
C
Originally
in
%
rdi
define
(
<
AP
>
,
<%
rsi
>
)
define
(
<
BP
>
,
<%
rdx
>
)
define
(
<
N
>
,
<%
r10
>
)
define
(
<
TMP
>
,
<%
r8
>
)
define
(
<
TMP2
>
,
<%
r9
>
)
define
(
<
CNT
>
,
<%
rdi
>
)
define
(
<
S0
>
,
<%
r11
>
)
define
(
<
S1
>
,
<%
rdi
>
)
C
Overlaps
with
CNT
define
(
<
USE_SSE2
>
,
<
no
>
)
.file
"memxor3.asm"
.text
C
memxor3
(
void
*
ds
t
,
const
void
*
a
,
const
void
*
b
,
si
ze_t
n
)
C
%
rdi
%
rsi
%
rdx
%
rcx
ALIGN
(
16
)
PROLOGUE
(
nettle_memxor3
)
W64_ENTRY
(
4
,
0
)
C
%
cl
needed
for
shift
count
,
so
move
away
N
mov
%
rcx
,
N
.Lmemxor3_entry:
test
N
,
N
C
Get
number
of
unaligned
byte
s
at
the
end
C
%
rdi
is
used
as
CNT
,
%
rax
as
DS
T
and
as
return
value
mov
%
rdi
,
%
rax
jz
.Ldone
add
N
,
CNT
and
$
7
,
CNT
jz
.Laligned
cmp
$
8
,
N
jc
.Lfinal_next
C
FIXME
:
Instead
of
this
loop
,
could
try
cmov
with
memory
C
destination
,
as
a
sequence
of
one
8
-
bit
,
one
16
-
bit
and
one
C
32
-
bit
operations.
(
Except
that
cmov
can
'
t
do
8
-
bit
ops
,
so
C
that
step
has
to
use
a
conditional
)
.
.Lalign_loop:
sub
$
1
,
N
movb
(
AP
,
N
),
LREG
(
TMP
)
xorb
(
BP
,
N
),
LREG
(
TMP
)
movb
LREG
(
TMP
),
(
DS
T
,
N
)
sub
$
1
,
CNT
jnz
.Lalign_loop
.Laligned:
ifelse
(
USE_SSE2
,
yes
,
<
cmp
$
16
,
N
jnc
.Lsse2_case
>)
C
Ch
eck
for
the
case
that
AP
and
BP
have
the
same
al
ignment
,
C
but
di
fferent
from
DS
T.
mov
AP
,
TMP
sub
BP
,
TMP
test
$
7
,
TMP
jnz
.Lno_shift_case
mov
AP
,
%
rcx
sub
DS
T
,
%
rcx
and
$
7
,
%
rcx
jz
.Lno_shift_case
sub
%
rcx
,
AP
sub
%
rcx
,
BP
shl
$
3
,
%
rcx
C
Unrolling
,
with
al
igned
values
al
ternating
in
S0
and
S1
test
$
8
,
N
jnz
.Lshift_odd
mov
(
AP
,
N
),
S1
xor
(
BP
,
N
),
S1
jmp
.Lshift_next
.Lshift_odd:
mov
-
8
(
AP
,
N
),
S1
mov
(
AP
,
N
),
S0
xor
-
8
(
BP
,
N
),
S1
xor
(
BP
,
N
),
S0
mov
S1
,
TMP
shr
%
cl
,
TMP
neg
%
cl
shl
%
cl
,
S0
neg
%
cl
or
S0
,
TMP
mov
TMP
,
-
8
(
DS
T
,
N
)
sub
$
8
,
N
jz
.Ldone
jmp
.Lshift_next
ALIGN
(
16
)
.Lshift_loop:
mov
8
(
AP
,
N
),
S0
xor
8
(
BP
,
N
),
S0
mov
S0
,
TMP
shr
%
cl
,
TMP
neg
%
cl
shl
%
cl
,
S1
neg
%
cl
or
S1
,
TMP
mov
TMP
,
8
(
DS
T
,
N
)
mov
(
AP
,
N
),
S1
xor
(
BP
,
N
),
S1
mov
S1
,
TMP
shr
%
cl
,
TMP
neg
%
cl
shl
%
cl
,
S0
neg
%
cl
or
S0
,
TMP
mov
TMP
,
(
DS
T
,
N
)
.Lshift_next:
sub
$
16
,
N
C
FIXME
:
Handle
the
case
N
==
16
sp
ecially
,
C
like
in
the
non
-
shifted
case?
C
ja
.Lshift_loop
C
jz
.Ldone
jnc
.Lshift_loop
add
$
15
,
N
jnc
.Ldone
shr
$
3
,
%
rcx
add
%
rcx
,
AP
add
%
rcx
,
BP
jmp
.Lfinal_loop
.Lno_shift_case:
C
Next
destination
word
is
-
8
(
DS
T
,
N
)
C
Setup
for
unrolling
test
$
8
,
N
jz
.Lword_next
sub
$
8
,
N
jz
.Lone_word
mov
(
AP
,
N
),
TMP
xor
(
BP
,
N
),
TMP
mov
TMP
,
(
DS
T
,
N
)
jmp
.Lword_next
ALIGN
(
16
)
.Lword_loop:
mov
8
(
AP
,
N
),
TMP
mov
(
AP
,
N
),
TMP2
xor
8
(
BP
,
N
),
TMP
xor
(
BP
,
N
),
TMP2
mov
TMP
,
8
(
DS
T
,
N
)
mov
TMP2
,
(
DS
T
,
N
)
.Lword_next:
sub
$
16
,
N
ja
.Lword_loop
C
Not
zero
and
no
carry
jnz
.Lfinal
C
Final
operation
is
word
al
igned
mov
8
(
AP
,
N
),
TMP
xor
8
(
BP
,
N
),
TMP
mov
TMP
,
8
(
DS
T
,
N
)
.Lone_word:
mov
(
AP
,
N
),
TMP
xor
(
BP
,
N
),
TMP
mov
TMP
,
(
DS
T
,
N
)
C
ENTRY
might
have
been
3
args
,
too
,
but
it
doesn
'
t
matter
for
the
exit
W64_EXIT
(
4
,
0
)
ret
.Lfinal:
add
$
15
,
N
.Lfinal_loop:
movb
(
AP
,
N
),
LREG
(
TMP
)
xorb
(
BP
,
N
),
LREG
(
TMP
)
movb
LREG
(
TMP
),
(
DS
T
,
N
)
.Lfinal_next:
sub
$
1
,
N
jnc
.Lfinal_loop
.Ldone:
C
ENTRY
might
have
been
3
args
,
too
,
but
it
doesn
'
t
matter
for
the
exit
W64_EXIT
(
4
,
0
)
ret
ifelse
(
USE_SSE2
,
yes
,
<
.Lsse2_case:
lea
(
DS
T
,
N
),
TMP
test
$
8
,
TMP
jz
.Lsse2_next
sub
$
8
,
N
mov
(
AP
,
N
),
TMP
xor
(
BP
,
N
),
TMP
mov
TMP
,
(
DS
T
,
N
)
jmp
.Lsse2_next
ALIGN
(
16
)
.Lsse2_loop:
movdqu
(
AP
,
N
),
%
xmm0
movdqu
(
BP
,
N
),
%
xmm1
pxor
%
xmm0
,
%
xmm1
movdqa
%
xmm1
,
(
DS
T
,
N
)
.Lsse2_next:
sub
$
16
,
N
ja
.Lsse2_loop
C
FIXME
:
See
if
we
can
do
a
full
word
first
,
before
the
C
byte
-
wise
final
loop.
jnz
.Lfinal
C
Final
operation
is
al
igned
movdqu
(
AP
),
%
xmm0
movdqu
(
BP
),
%
xmm1
pxor
%
xmm0
,
%
xmm1
movdqa
%
xmm1
,
(
DS
T
)
C
ENTRY
might
have
been
3
args
,
too
,
but
it
doesn
'
t
matter
for
the
exit
W64_EXIT
(
4
,
0
)
ret
>)
EPILOGUE
(
nettle_memxor3
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment