Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Wim Lewis
nettle
Commits
20525ae7
Commit
20525ae7
authored
Jan 11, 2015
by
Niels Möller
Browse files
Merge branch 'memxor-reorg' into master.
parents
09311caa
89a6fe72
Changes
11
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
20525ae7
2015-01-11 Niels Möller <nisse@lysator.liu.se>
Merged memxor-reorg changes, starting at 2014-10-23.
2015-01-10 Niels Möller <nisse@lysator.liu.se>
* arm/memxor.asm (memxor3): Moved to new file.
* arm/memxor3.asm: New file.
2014-11-24 Niels Möller <nisse@lysator.liu.se>
* x86_64/memxor3.asm (memxor3): New file, code moved from old
memxor.asm.
* x86_64/memxor.asm (memxor): Rewritten, no longer jumps into
memxor3.
* configure.ac (asm_replace_list): Added memxor.asm and
memxor3.asm.
2014-10-23 Niels Möller <nisse@lysator.liu.se>
* configure.ac (IF_ASM): New substituted variable.
* testsuite/Makefile.in (VALGRIND): Allow partial loads only when
build includes assembly files.
* memxor-internal.h (READ_PARTIAL): New macro.
* memxor.c (memxor_different_alignment): Avoid out-of-bounds
reads, corresponding to valgrind's --partial-loads-ok. Use
READ_PARTIAL.
* memxor3.c: Analogous changes for unaligned operations.
* configure.ac (asm_replace_list): Deleted memxor.asm, now
incompatible with the memxor/memxor3 split.
* memxor3.c: New file, split off from memxor.c.
* memxor-internal.h: New file, declarations shared by memxor.c and
memxor3.c.
* memxor.c: memxor3 fucntions moved out from this file.
* Makefile.in (nettle_SOURCES): Added memxor3.c.
(DISTFILES): Added memxor-internal.h.
* memxor.c (memxor_common_alignment, memxor_different_alignment)
(memxor): Change loop order, iterate from the end.
(memxor3_common_alignment): Unroll twice.
(word_t): On x86_64, unconditionally define as uint64_t, to get 64
bits also in M$ windows. Replaced all uses of SIZEOF_LONG.
2014-12-12 Niels Möller <nisse@lysator.liu.se>
* cbc.h (CBC_ENCRYPT, CBC_DECRYPT): Make type-checking hack
...
...
Makefile.in
View file @
20525ae7
...
...
@@ -109,7 +109,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \
knuth-lfib.c
\
md2.c md2-meta.c md4.c md4-meta.c
\
md5.c md5-compress.c md5-compat.c md5-meta.c
\
memxor.c
\
memxor.c
memxor3.c
\
nettle-meta-aeads.c nettle-meta-armors.c
\
nettle-meta-ciphers.c nettle-meta-hashes.c
\
pbkdf2.c pbkdf2-hmac-sha1.c pbkdf2-hmac-sha256.c
\
...
...
@@ -216,7 +216,7 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \
$(des_headers)
descore.README
\
aes-internal.h camellia-internal.h serpent-internal.h
\
cast128_sboxes.h desinfo.h desCode.h
\
nettle-internal.h nettle-write.h
\
memxor-internal.h
nettle-internal.h nettle-write.h
\
gmp-glue.h ecc-internal.h
\
mini-gmp.h mini-gmp.c asm.m4
\
nettle.texinfo nettle.info nettle.html nettle.pdf sha-example.c
...
...
arm/memxor.asm
View file @
20525ae7
...
...
@@ -227,274 +227,3 @@ PROLOGUE(nettle_memxor)
b
.Lmemxor_bytes
EPILOGUE
(
nettle_memxor
)
define
(
<
DS
T
>
,
<
r0
>
)
define
(
<
AP
>
,
<
r1
>
)
define
(
<
BP
>
,
<
r2
>
)
define
(
<
N
>
,
<
r3
>
)
undefine
(
<
CNT
>
)
undefine
(
<
TNC
>
)
C
Temporaries
r4
-
r7
define
(
<
ACNT
>
,
<
r8
>
)
define
(
<
ATNC
>
,
<
r10
>
)
define
(
<
BCNT
>
,
<
r11
>
)
define
(
<
BTNC
>
,
<
r12
>
)
C
memxor3
(
void
*
ds
t
,
const
void
*
a
,
const
void
*
b
,
si
ze_t
n
)
.align
2
PROLOGUE
(
nettle_memxor3
)
cmp
N
,
#
0
beq
.Lmemxor3_ret
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
cmp
N
,
#
7
add
AP
,
N
add
BP
,
N
add
DS
T
,
N
bcs
.Lmemxor3_large
C
Si
mple
byte
loop
.Lmemxor3_bytes:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r4
,
r5
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
bne
.Lmemxor3_bytes
.Lmemxor3_done:
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
.Lmemxor3_ret:
bx
lr
.Lmemxor3_align_loop:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r5
,
r4
strb
r5
,
[
DS
T
,
#
-
1
]
!
sub
N
,
#
1
.Lmemxor3_large:
tst
DS
T
,
#
3
bne
.Lmemxor3_align_loop
C
We
have
at
least
4
byte
s
left
to
do
here.
sub
N
,
#
4
ands
ACNT
,
AP
,
#
3
lsl
ACNT
,
#
3
beq
.Lmemxor3_a_aligned
ands
BCNT
,
BP
,
#
3
lsl
BCNT
,
#
3
bne
.Lmemxor3_uu
C
Swap
mov
r4
,
AP
mov
AP
,
BP
mov
BP
,
r4
.Lmemxor3_au:
C
NOTE
:
We
have
the
relevant
shift
count
in
ACNT
,
not
BCNT
C
AP
is
al
igned
,
BP
is
not
C
v
original
SRC
C
+-------+------+
C
|
SRC
-
4
|
SRC
|
C
+---+---+------+
C
|
DS
T
-
4
|
C
+-------+
C
C
With
little
-
endian
,
we
need
to
do
C
DS
T
[
i
-
i
]
^
=
(
SRC
[
i
-
i
]
>>
CNT
)
^
(
SRC
[
i
]
<<
TNC
)
rsb
ATNC
,
ACNT
,
#
32
bic
BP
,
#
3
ldr
r4
,
[
BP
]
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_au_odd
.Lmemxor3_au_loop:
ldr
r5
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r4
,
lsl
ATNC
eor
r6
,
r6
,
r5
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_au_odd:
ldr
r4
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r5
,
lsl
ATNC
eor
r6
,
r6
,
r4
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_au_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
r4
,
low
end
ldr
r5
,
[
AP
,
#
-
4
]
eor
r4
,
r5
,
r4
,
lsl
ATNC
.Lmemxor3_au_leftover:
C
Store
a
byte
at
a
time
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
sub
AP
,
#
1
bne
.Lmemxor3_au_leftover
b
.Lmemxor3_bytes
.Lmemxor3_a_aligned:
ands
ACNT
,
BP
,
#
3
lsl
ACNT
,
#
3
bne
.Lmemxor3_au
;
C
a
,
b
and
ds
t
al
l
have
the
same
al
ignment.
subs
N
,
#
8
bcc
.Lmemxor3_aligned_word_end
C
This
loop
runs
at
8
cycles
per
iteration.
It
has
been
C
observed
running
at
only
7
cycles
,
for
this
sp
eed
,
the
loop
C
started
at
offset
0x2ac
in
the
object
file.
C
FIXME
:
consider
software
pipelining
,
si
milarly
to
the
memxor
C
loop.
.Lmemxor3_aligned_word_loop:
ldmdb
AP
!
,
{
r4
,
r5
,
r6
}
ldmdb
BP
!
,
{
r7
,
r8
,
r10
}
subs
N
,
#
12
eor
r4
,
r7
eor
r5
,
r8
eor
r6
,
r10
stmdb
DS
T
!
,
{
r4
,
r5
,
r6
}
bcs
.Lmemxor3_aligned_word_loop
.Lmemxor3_aligned_word_end:
C
We
have
0
-
11
byte
s
left
to
do
,
and
N
holds
number
of
byte
s
-
12
.
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_8
C
Do
8
byte
s
more
,
leftover
is
in
N
ldmdb
AP
!
,
{
r4
,
r5
}
ldmdb
BP
!
,
{
r6
,
r7
}
eor
r4
,
r6
eor
r5
,
r7
stmdb
DS
T
!
,
{
r4
,
r5
}
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_8:
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_4
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r5
,
[
BP
,
#
-
4
]
!
eor
r4
,
r5
str
r4
,
[
DS
T
,
#
-
4
]
!
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_4:
adds
N
,
#
4
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_uu:
cmp
ACNT
,
BCNT
bic
AP
,
#
3
bic
BP
,
#
3
rsb
ATNC
,
ACNT
,
#
32
bne
.Lmemxor3_uud
C
AP
and
BP
are
unaligned
in
the
same
way
ldr
r4
,
[
AP
]
ldr
r6
,
[
BP
]
eor
r4
,
r6
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_uu_odd
.Lmemxor3_uu_loop:
ldr
r5
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r5
,
r6
lsl
r4
,
ATNC
eor
r4
,
r4
,
r5
,
lsr
ACNT
str
r4
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_uu_odd:
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r4
,
r6
lsl
r5
,
ATNC
eor
r5
,
r5
,
r4
,
lsr
ACNT
str
r5
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_uu_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
a4
,
low
end
ror
r4
,
ACNT
.Lmemxor3_uu_leftover:
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
bne
.Lmemxor3_uu_leftover
b
.Lmemxor3_bytes
.Lmemxor3_uud:
C
Both
AP
and
BP
unaligned
,
and
in
di
fferent
ways
rsb
BTNC
,
BCNT
,
#
32
ldr
r4
,
[
AP
]
ldr
r6
,
[
BP
]
tst
N
,
#
4
ittet
eq
moveq
r5
,
r4
moveq
r7
,
r6
subne
N
,
#
4
beq
.Lmemxor3_uud_odd
.Lmemxor3_uud_loop:
ldr
r5
,
[
AP
,
#
-
4
]
!
ldr
r7
,
[
BP
,
#
-
4
]
!
lsl
r4
,
ATNC
eor
r4
,
r4
,
r6
,
lsl
BTNC
eor
r4
,
r4
,
r5
,
lsr
ACNT
eor
r4
,
r4
,
r7
,
lsr
BCNT
str
r4
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_uud_odd:
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
lsl
r5
,
ATNC
eor
r5
,
r5
,
r7
,
lsl
BTNC
eor
r5
,
r5
,
r4
,
lsr
ACNT
eor
r5
,
r5
,
r6
,
lsr
BCNT
str
r5
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_uud_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
FIXME
:
More
cl
ever
left
-
over
handling?
For
now
,
just
adjust
pointers.
add
AP
,
AP
,
ACNT
,
lsr
#
3
add
BP
,
BP
,
BCNT
,
lsr
#
3
b
.Lmemxor3_bytes
EPILOGUE
(
nettle_memxor3
)
arm/memxor3.asm
0 → 100644
View file @
20525ae7
C
arm
/
memxor3.asm
ifelse
(
<
Copyright
(
C
)
2013
,
2015
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
GNU
Nettle
is
free
software
:
you
can
redistribute
it
and
/
or
modify
it
under
the
terms
of
either
:
*
the
GNU
Lesser
General
Public
License
as
published
by
the
Free
Software
Foundation
; either version 3 of the License, or (at your
option
)
any
later
version.
or
*
the
GNU
General
Public
License
as
published
by
the
Free
Software
Foundation
; either version 2 of the License, or (at your
option
)
any
later
version.
or
both
in
parallel
,
as
here.
GNU
Nettle
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
WITHOUT
ANY
WARRANTY
; without even the implied warranty of
MERCHANTABILITY
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
General
Public
License
for
more
details.
You
should
have
received
copies
of
the
GNU
General
Public
License
and
the
GNU
Lesser
General
Public
License
al
ong
with
this
program.
If
not
,
see
http
:
//
www.gnu.org
/
licenses
/
.
>)
C
Possible
sp
eedups
:
C
C
The
ldm
instruction
can
do
load
two
registers
per
cycle
,
C
if
the
address
is
two
-
word
al
igned.
Or
three
registers
in
two
C
cycles
,
regardless
of
al
ignment.
C
Register
usage
:
define
(
<
DS
T
>
,
<
r0
>
)
define
(
<
AP
>
,
<
r1
>
)
define
(
<
BP
>
,
<
r2
>
)
define
(
<
N
>
,
<
r3
>
)
C
Temporaries
r4
-
r7
define
(
<
ACNT
>
,
<
r8
>
)
define
(
<
ATNC
>
,
<
r10
>
)
define
(
<
BCNT
>
,
<
r11
>
)
define
(
<
BTNC
>
,
<
r12
>
)
.syntax
unified
.file
"memxor3.asm"
.text
.arm
C
memxor3
(
void
*
ds
t
,
const
void
*
a
,
const
void
*
b
,
si
ze_t
n
)
.align
2
PROLOGUE
(
nettle_memxor3
)
cmp
N
,
#
0
beq
.Lmemxor3_ret
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
cmp
N
,
#
7
add
AP
,
N
add
BP
,
N
add
DS
T
,
N
bcs
.Lmemxor3_large
C
Si
mple
byte
loop
.Lmemxor3_bytes:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r4
,
r5
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
bne
.Lmemxor3_bytes
.Lmemxor3_done:
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
.Lmemxor3_ret:
bx
lr
.Lmemxor3_align_loop:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r5
,
r4
strb
r5
,
[
DS
T
,
#
-
1
]
!
sub
N
,
#
1
.Lmemxor3_large:
tst
DS
T
,
#
3
bne
.Lmemxor3_align_loop
C
We
have
at
least
4
byte
s
left
to
do
here.
sub
N
,
#
4
ands
ACNT
,
AP
,
#
3
lsl
ACNT
,
#
3
beq
.Lmemxor3_a_aligned
ands
BCNT
,
BP
,
#
3
lsl
BCNT
,
#
3
bne
.Lmemxor3_uu
C
Swap
mov
r4
,
AP
mov
AP
,
BP
mov
BP
,
r4
.Lmemxor3_au:
C
NOTE
:
We
have
the
relevant
shift
count
in
ACNT
,
not
BCNT
C
AP
is
al
igned
,
BP
is
not
C
v
original
SRC
C
+-------+------+
C
|
SRC
-
4
|
SRC
|
C
+---+---+------+
C
|
DS
T
-
4
|
C
+-------+
C
C
With
little
-
endian
,
we
need
to
do
C
DS
T
[
i
-
i
]
^
=
(
SRC
[
i
-
i
]
>>
CNT
)
^
(
SRC
[
i
]
<<
TNC
)
rsb
ATNC
,
ACNT
,
#
32
bic
BP
,
#
3
ldr
r4
,
[
BP
]
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_au_odd
.Lmemxor3_au_loop:
ldr
r5
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r4
,
lsl
ATNC
eor
r6
,
r6
,
r5
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_au_odd:
ldr
r4
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r5
,
lsl
ATNC
eor
r6
,
r6
,
r4
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_au_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
r4
,
low
end
ldr
r5
,
[
AP
,
#
-
4
]
eor
r4
,
r5
,
r4
,
lsl
ATNC
.Lmemxor3_au_leftover:
C
Store
a
byte
at
a
time
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
sub
AP
,
#
1
bne
.Lmemxor3_au_leftover
b
.Lmemxor3_bytes
.Lmemxor3_a_aligned:
ands
ACNT
,
BP
,
#
3
lsl
ACNT
,
#
3
bne
.Lmemxor3_au
;
C
a
,
b
and
ds
t
al
l
have
the
same
al
ignment.
subs
N
,
#
8
bcc
.Lmemxor3_aligned_word_end
C
This
loop
runs
at
8
cycles
per
iteration.
It
has
been
C
observed
running
at
only
7
cycles
,
for
this
sp
eed
,
the
loop
C
started
at
offset
0x2ac
in
the
object
file.
C
FIXME
:
consider
software
pipelining
,
si
milarly
to
the
memxor
C
loop.
.Lmemxor3_aligned_word_loop:
ldmdb
AP
!
,
{
r4
,
r5
,
r6
}
ldmdb
BP
!
,
{
r7
,
r8
,
r10
}
subs
N
,
#
12
eor
r4
,
r7
eor
r5
,
r8
eor
r6
,
r10
stmdb
DS
T
!
,
{
r4
,
r5
,
r6
}
bcs
.Lmemxor3_aligned_word_loop
.Lmemxor3_aligned_word_end:
C
We
have
0
-
11
byte
s
left
to
do
,
and
N
holds
number
of
byte
s
-
12
.
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_8
C
Do
8
byte
s
more
,
leftover
is
in
N
ldmdb
AP
!
,
{
r4
,
r5
}
ldmdb
BP
!
,
{
r6
,
r7
}
eor
r4
,
r6
eor
r5
,
r7
stmdb
DS
T
!
,
{
r4
,
r5
}
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_8:
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_4
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r5
,
[
BP
,
#
-
4
]
!
eor
r4
,
r5
str
r4
,
[
DS
T
,
#
-
4
]
!
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_4:
adds
N
,
#
4
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_uu:
cmp
ACNT
,
BCNT
bic
AP
,
#
3
bic
BP
,
#
3
rsb
ATNC
,
ACNT
,
#
32
bne
.Lmemxor3_uud
C
AP
and
BP
are
unaligned
in
the
same
way
ldr
r4
,
[
AP
]
ldr
r6
,
[
BP
]
eor
r4
,
r6
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_uu_odd
.Lmemxor3_uu_loop:
ldr
r5
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r5
,
r6
lsl
r4
,
ATNC
eor
r4
,
r4
,
r5
,
lsr
ACNT
str
r4
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_uu_odd:
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r4
,
r6
lsl
r5
,
ATNC
eor
r5
,
r5
,
r4
,
lsr
ACNT
str
r5
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_uu_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
a4
,
low
end
ror
r4
,
ACNT
.Lmemxor3_uu_leftover:
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
bne
.Lmemxor3_uu_leftover
b
.Lmemxor3_bytes
.Lmemxor3_uud:
C
Both
AP
and
BP
unaligned
,
and
in
di
fferent
ways