Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Wim Lewis
nettle
Commits
89a6fe72
Commit
89a6fe72
authored
Jan 10, 2015
by
Niels Möller
Browse files
arm: Moved memxor3 to new file, arm/memxor3.asm.
parent
93694133
Changes
3
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
89a6fe72
2015-01-10 Niels Möller <nisse@lysator.liu.se>
* arm/memxor.asm (memxor3): Moved to new file.
* arm/memxor3.asm: New file.
2014-11-24 Niels Möller <nisse@lysator.liu.se>
* x86_64/memxor3.asm (memxor3): New file, code moved from old
...
...
arm/memxor.asm
View file @
89a6fe72
...
...
@@ -227,274 +227,3 @@ PROLOGUE(nettle_memxor)
b
.Lmemxor_bytes
EPILOGUE
(
nettle_memxor
)
define
(
<
DS
T
>
,
<
r0
>
)
define
(
<
AP
>
,
<
r1
>
)
define
(
<
BP
>
,
<
r2
>
)
define
(
<
N
>
,
<
r3
>
)
undefine
(
<
CNT
>
)
undefine
(
<
TNC
>
)
C
Temporaries
r4
-
r7
define
(
<
ACNT
>
,
<
r8
>
)
define
(
<
ATNC
>
,
<
r10
>
)
define
(
<
BCNT
>
,
<
r11
>
)
define
(
<
BTNC
>
,
<
r12
>
)
C
memxor3
(
void
*
ds
t
,
const
void
*
a
,
const
void
*
b
,
si
ze_t
n
)
.align
2
PROLOGUE
(
nettle_memxor3
)
cmp
N
,
#
0
beq
.Lmemxor3_ret
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
cmp
N
,
#
7
add
AP
,
N
add
BP
,
N
add
DS
T
,
N
bcs
.Lmemxor3_large
C
Si
mple
byte
loop
.Lmemxor3_bytes:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r4
,
r5
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
bne
.Lmemxor3_bytes
.Lmemxor3_done:
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
.Lmemxor3_ret:
bx
lr
.Lmemxor3_align_loop:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r5
,
r4
strb
r5
,
[
DS
T
,
#
-
1
]
!
sub
N
,
#
1
.Lmemxor3_large:
tst
DS
T
,
#
3
bne
.Lmemxor3_align_loop
C
We
have
at
least
4
byte
s
left
to
do
here.
sub
N
,
#
4
ands
ACNT
,
AP
,
#
3
lsl
ACNT
,
#
3
beq
.Lmemxor3_a_aligned
ands
BCNT
,
BP
,
#
3
lsl
BCNT
,
#
3
bne
.Lmemxor3_uu
C
Swap
mov
r4
,
AP
mov
AP
,
BP
mov
BP
,
r4
.Lmemxor3_au:
C
NOTE
:
We
have
the
relevant
shift
count
in
ACNT
,
not
BCNT
C
AP
is
al
igned
,
BP
is
not
C
v
original
SRC
C
+-------+------+
C
|
SRC
-
4
|
SRC
|
C
+---+---+------+
C
|
DS
T
-
4
|
C
+-------+
C
C
With
little
-
endian
,
we
need
to
do
C
DS
T
[
i
-
i
]
^
=
(
SRC
[
i
-
i
]
>>
CNT
)
^
(
SRC
[
i
]
<<
TNC
)
rsb
ATNC
,
ACNT
,
#
32
bic
BP
,
#
3
ldr
r4
,
[
BP
]
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_au_odd
.Lmemxor3_au_loop:
ldr
r5
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r4
,
lsl
ATNC
eor
r6
,
r6
,
r5
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_au_odd:
ldr
r4
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r5
,
lsl
ATNC
eor
r6
,
r6
,
r4
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_au_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
r4
,
low
end
ldr
r5
,
[
AP
,
#
-
4
]
eor
r4
,
r5
,
r4
,
lsl
ATNC
.Lmemxor3_au_leftover:
C
Store
a
byte
at
a
time
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
sub
AP
,
#
1
bne
.Lmemxor3_au_leftover
b
.Lmemxor3_bytes
.Lmemxor3_a_aligned:
ands
ACNT
,
BP
,
#
3
lsl
ACNT
,
#
3
bne
.Lmemxor3_au
;
C
a
,
b
and
ds
t
al
l
have
the
same
al
ignment.
subs
N
,
#
8
bcc
.Lmemxor3_aligned_word_end
C
This
loop
runs
at
8
cycles
per
iteration.
It
has
been
C
observed
running
at
only
7
cycles
,
for
this
sp
eed
,
the
loop
C
started
at
offset
0x2ac
in
the
object
file.
C
FIXME
:
consider
software
pipelining
,
si
milarly
to
the
memxor
C
loop.
.Lmemxor3_aligned_word_loop:
ldmdb
AP
!
,
{
r4
,
r5
,
r6
}
ldmdb
BP
!
,
{
r7
,
r8
,
r10
}
subs
N
,
#
12
eor
r4
,
r7
eor
r5
,
r8
eor
r6
,
r10
stmdb
DS
T
!
,
{
r4
,
r5
,
r6
}
bcs
.Lmemxor3_aligned_word_loop
.Lmemxor3_aligned_word_end:
C
We
have
0
-
11
byte
s
left
to
do
,
and
N
holds
number
of
byte
s
-
12
.
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_8
C
Do
8
byte
s
more
,
leftover
is
in
N
ldmdb
AP
!
,
{
r4
,
r5
}
ldmdb
BP
!
,
{
r6
,
r7
}
eor
r4
,
r6
eor
r5
,
r7
stmdb
DS
T
!
,
{
r4
,
r5
}
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_8:
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_4
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r5
,
[
BP
,
#
-
4
]
!
eor
r4
,
r5
str
r4
,
[
DS
T
,
#
-
4
]
!
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_4:
adds
N
,
#
4
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_uu:
cmp
ACNT
,
BCNT
bic
AP
,
#
3
bic
BP
,
#
3
rsb
ATNC
,
ACNT
,
#
32
bne
.Lmemxor3_uud
C
AP
and
BP
are
unaligned
in
the
same
way
ldr
r4
,
[
AP
]
ldr
r6
,
[
BP
]
eor
r4
,
r6
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_uu_odd
.Lmemxor3_uu_loop:
ldr
r5
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r5
,
r6
lsl
r4
,
ATNC
eor
r4
,
r4
,
r5
,
lsr
ACNT
str
r4
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_uu_odd:
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r4
,
r6
lsl
r5
,
ATNC
eor
r5
,
r5
,
r4
,
lsr
ACNT
str
r5
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_uu_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
a4
,
low
end
ror
r4
,
ACNT
.Lmemxor3_uu_leftover:
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
bne
.Lmemxor3_uu_leftover
b
.Lmemxor3_bytes
.Lmemxor3_uud:
C
Both
AP
and
BP
unaligned
,
and
in
di
fferent
ways
rsb
BTNC
,
BCNT
,
#
32
ldr
r4
,
[
AP
]
ldr
r6
,
[
BP
]
tst
N
,
#
4
ittet
eq
moveq
r5
,
r4
moveq
r7
,
r6
subne
N
,
#
4
beq
.Lmemxor3_uud_odd
.Lmemxor3_uud_loop:
ldr
r5
,
[
AP
,
#
-
4
]
!
ldr
r7
,
[
BP
,
#
-
4
]
!
lsl
r4
,
ATNC
eor
r4
,
r4
,
r6
,
lsl
BTNC
eor
r4
,
r4
,
r5
,
lsr
ACNT
eor
r4
,
r4
,
r7
,
lsr
BCNT
str
r4
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_uud_odd:
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
lsl
r5
,
ATNC
eor
r5
,
r5
,
r7
,
lsl
BTNC
eor
r5
,
r5
,
r4
,
lsr
ACNT
eor
r5
,
r5
,
r6
,
lsr
BCNT
str
r5
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_uud_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
FIXME
:
More
cl
ever
left
-
over
handling?
For
now
,
just
adjust
pointers.
add
AP
,
AP
,
ACNT
,
lsr
#
3
add
BP
,
BP
,
BCNT
,
lsr
#
3
b
.Lmemxor3_bytes
EPILOGUE
(
nettle_memxor3
)
arm/memxor3.asm
0 → 100644
View file @
89a6fe72
C
arm
/
memxor3.asm
ifelse
(
<
Copyright
(
C
)
2013
,
2015
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
GNU
Nettle
is
free
software
:
you
can
redistribute
it
and
/
or
modify
it
under
the
terms
of
either
:
*
the
GNU
Lesser
General
Public
License
as
published
by
the
Free
Software
Foundation
; either version 3 of the License, or (at your
option
)
any
later
version.
or
*
the
GNU
General
Public
License
as
published
by
the
Free
Software
Foundation
; either version 2 of the License, or (at your
option
)
any
later
version.
or
both
in
parallel
,
as
here.
GNU
Nettle
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
WITHOUT
ANY
WARRANTY
; without even the implied warranty of
MERCHANTABILITY
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
General
Public
License
for
more
details.
You
should
have
received
copies
of
the
GNU
General
Public
License
and
the
GNU
Lesser
General
Public
License
al
ong
with
this
program.
If
not
,
see
http
:
//
www.gnu.org
/
licenses
/
.
>)
C
Possible
sp
eedups
:
C
C
The
ldm
instruction
can
do
load
two
registers
per
cycle
,
C
if
the
address
is
two
-
word
al
igned.
Or
three
registers
in
two
C
cycles
,
regardless
of
al
ignment.
C
Register
usage
:
define
(
<
DS
T
>
,
<
r0
>
)
define
(
<
AP
>
,
<
r1
>
)
define
(
<
BP
>
,
<
r2
>
)
define
(
<
N
>
,
<
r3
>
)
C
Temporaries
r4
-
r7
define
(
<
ACNT
>
,
<
r8
>
)
define
(
<
ATNC
>
,
<
r10
>
)
define
(
<
BCNT
>
,
<
r11
>
)
define
(
<
BTNC
>
,
<
r12
>
)
.syntax
unified
.file
"memxor3.asm"
.text
.arm
C
memxor3
(
void
*
ds
t
,
const
void
*
a
,
const
void
*
b
,
si
ze_t
n
)
.align
2
PROLOGUE
(
nettle_memxor3
)
cmp
N
,
#
0
beq
.Lmemxor3_ret
push
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
cmp
N
,
#
7
add
AP
,
N
add
BP
,
N
add
DS
T
,
N
bcs
.Lmemxor3_large
C
Si
mple
byte
loop
.Lmemxor3_bytes:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r4
,
r5
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
bne
.Lmemxor3_bytes
.Lmemxor3_done:
pop
{
r4
,
r5
,
r6
,
r7
,
r8
,
r10
,
r11
}
.Lmemxor3_ret:
bx
lr
.Lmemxor3_align_loop:
ldrb
r4
,
[
AP
,
#
-
1
]
!
ldrb
r5
,
[
BP
,
#
-
1
]
!
eor
r5
,
r4
strb
r5
,
[
DS
T
,
#
-
1
]
!
sub
N
,
#
1
.Lmemxor3_large:
tst
DS
T
,
#
3
bne
.Lmemxor3_align_loop
C
We
have
at
least
4
byte
s
left
to
do
here.
sub
N
,
#
4
ands
ACNT
,
AP
,
#
3
lsl
ACNT
,
#
3
beq
.Lmemxor3_a_aligned
ands
BCNT
,
BP
,
#
3
lsl
BCNT
,
#
3
bne
.Lmemxor3_uu
C
Swap
mov
r4
,
AP
mov
AP
,
BP
mov
BP
,
r4
.Lmemxor3_au:
C
NOTE
:
We
have
the
relevant
shift
count
in
ACNT
,
not
BCNT
C
AP
is
al
igned
,
BP
is
not
C
v
original
SRC
C
+-------+------+
C
|
SRC
-
4
|
SRC
|
C
+---+---+------+
C
|
DS
T
-
4
|
C
+-------+
C
C
With
little
-
endian
,
we
need
to
do
C
DS
T
[
i
-
i
]
^
=
(
SRC
[
i
-
i
]
>>
CNT
)
^
(
SRC
[
i
]
<<
TNC
)
rsb
ATNC
,
ACNT
,
#
32
bic
BP
,
#
3
ldr
r4
,
[
BP
]
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_au_odd
.Lmemxor3_au_loop:
ldr
r5
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r4
,
lsl
ATNC
eor
r6
,
r6
,
r5
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_au_odd:
ldr
r4
,
[
BP
,
#
-
4
]
!
ldr
r6
,
[
AP
,
#
-
4
]
!
eor
r6
,
r6
,
r5
,
lsl
ATNC
eor
r6
,
r6
,
r4
,
lsr
ACNT
str
r6
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_au_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
r4
,
low
end
ldr
r5
,
[
AP
,
#
-
4
]
eor
r4
,
r5
,
r4
,
lsl
ATNC
.Lmemxor3_au_leftover:
C
Store
a
byte
at
a
time
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
sub
AP
,
#
1
bne
.Lmemxor3_au_leftover
b
.Lmemxor3_bytes
.Lmemxor3_a_aligned:
ands
ACNT
,
BP
,
#
3
lsl
ACNT
,
#
3
bne
.Lmemxor3_au
;
C
a
,
b
and
ds
t
al
l
have
the
same
al
ignment.
subs
N
,
#
8
bcc
.Lmemxor3_aligned_word_end
C
This
loop
runs
at
8
cycles
per
iteration.
It
has
been
C
observed
running
at
only
7
cycles
,
for
this
sp
eed
,
the
loop
C
started
at
offset
0x2ac
in
the
object
file.
C
FIXME
:
consider
software
pipelining
,
si
milarly
to
the
memxor
C
loop.
.Lmemxor3_aligned_word_loop:
ldmdb
AP
!
,
{
r4
,
r5
,
r6
}
ldmdb
BP
!
,
{
r7
,
r8
,
r10
}
subs
N
,
#
12
eor
r4
,
r7
eor
r5
,
r8
eor
r6
,
r10
stmdb
DS
T
!
,
{
r4
,
r5
,
r6
}
bcs
.Lmemxor3_aligned_word_loop
.Lmemxor3_aligned_word_end:
C
We
have
0
-
11
byte
s
left
to
do
,
and
N
holds
number
of
byte
s
-
12
.
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_8
C
Do
8
byte
s
more
,
leftover
is
in
N
ldmdb
AP
!
,
{
r4
,
r5
}
ldmdb
BP
!
,
{
r6
,
r7
}
eor
r4
,
r6
eor
r5
,
r7
stmdb
DS
T
!
,
{
r4
,
r5
}
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_8:
adds
N
,
#
4
bcc
.Lmemxor3_aligned_lt_4
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r5
,
[
BP
,
#
-
4
]
!
eor
r4
,
r5
str
r4
,
[
DS
T
,
#
-
4
]
!
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_aligned_lt_4:
adds
N
,
#
4
beq
.Lmemxor3_done
b
.Lmemxor3_bytes
.Lmemxor3_uu:
cmp
ACNT
,
BCNT
bic
AP
,
#
3
bic
BP
,
#
3
rsb
ATNC
,
ACNT
,
#
32
bne
.Lmemxor3_uud
C
AP
and
BP
are
unaligned
in
the
same
way
ldr
r4
,
[
AP
]
ldr
r6
,
[
BP
]
eor
r4
,
r6
tst
N
,
#
4
itet
eq
moveq
r5
,
r4
subne
N
,
#
4
beq
.Lmemxor3_uu_odd
.Lmemxor3_uu_loop:
ldr
r5
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r5
,
r6
lsl
r4
,
ATNC
eor
r4
,
r4
,
r5
,
lsr
ACNT
str
r4
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_uu_odd:
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
eor
r4
,
r6
lsl
r5
,
ATNC
eor
r5
,
r5
,
r4
,
lsr
ACNT
str
r5
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_uu_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
Leftover
byte
s
in
a4
,
low
end
ror
r4
,
ACNT
.Lmemxor3_uu_leftover:
ror
r4
,
#
24
strb
r4
,
[
DS
T
,
#
-
1
]
!
subs
N
,
#
1
beq
.Lmemxor3_done
subs
ACNT
,
#
8
bne
.Lmemxor3_uu_leftover
b
.Lmemxor3_bytes
.Lmemxor3_uud:
C
Both
AP
and
BP
unaligned
,
and
in
di
fferent
ways
rsb
BTNC
,
BCNT
,
#
32
ldr
r4
,
[
AP
]
ldr
r6
,
[
BP
]
tst
N
,
#
4
ittet
eq
moveq
r5
,
r4
moveq
r7
,
r6
subne
N
,
#
4
beq
.Lmemxor3_uud_odd
.Lmemxor3_uud_loop:
ldr
r5
,
[
AP
,
#
-
4
]
!
ldr
r7
,
[
BP
,
#
-
4
]
!
lsl
r4
,
ATNC
eor
r4
,
r4
,
r6
,
lsl
BTNC
eor
r4
,
r4
,
r5
,
lsr
ACNT
eor
r4
,
r4
,
r7
,
lsr
BCNT
str
r4
,
[
DS
T
,
#
-
4
]
!
.Lmemxor3_uud_odd:
ldr
r4
,
[
AP
,
#
-
4
]
!
ldr
r6
,
[
BP
,
#
-
4
]
!
lsl
r5
,
ATNC
eor
r5
,
r5
,
r7
,
lsl
BTNC
eor
r5
,
r5
,
r4
,
lsr
ACNT
eor
r5
,
r5
,
r6
,
lsr
BCNT
str
r5
,
[
DS
T
,
#
-
4
]
!
subs
N
,
#
8
bcs
.Lmemxor3_uud_loop
adds
N
,
#
8
beq
.Lmemxor3_done
C
FIXME
:
More
cl
ever
left
-
over
handling?
For
now
,
just
adjust
pointers.
add
AP
,
AP
,
ACNT
,
lsr
#
3
add
BP
,
BP
,
BCNT
,
lsr
#
3
b
.Lmemxor3_bytes
EPILOGUE
(
nettle_memxor3
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment