ARM: Optmized aligned case of memxor.

2013-02-06 Niels Möller <>
* armv7/memxor.asm (memxor): Optimized aligned case, now runs at
0.75 cycles/byte.
* armv7/README: New file.
* armv7/machine.m4: New (empty) file.
* armv7/memxor.asm: Initial assembly implementation.
...@@ -131,14 +131,36 @@ PROLOGUE(memxor) ...@@ -131,14 +131,36 @@ PROLOGUE(memxor)
b .Lmemxor_bytes b .Lmemxor_bytes
.Lmemxor_same: .Lmemxor_same:
tst N, #4
it ne
subne N, #4
bne .Lmemxor_same_loop
ldr r3, [SRC], #+4 ldr r3, [SRC], #+4
ldr r4, [DST] ldr r4, [DST]
eor r3, r4 eor r3, r4
str r3, [DST], #+4 str r3, [DST], #+4
subs N, #4 subs N, #8
bcs .Lmemxor_same bcc .Lmemxor_same_end
adds N, #4
C 6 cycles per iteration, 0.75 cycles/byte
ldr r4, [SRC, #+4]
ldr r3, [SRC], #+8
ldr r6, [DST, #+4]
ldr r5, [DST]
eor r4, r6
eor r3, r5
subs N, #8
str r4, [DST, #+4]
str r3, [DST], #+8
bcs .Lmemxor_same_loop
adds N, #8
beq .Lmemxor_done beq .Lmemxor_done
b .Lmemxor_bytes b .Lmemxor_bytes
EPILOGUE(memxor) EPILOGUE(memxor)
