Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • nettle/nettle
  • briansmith/nettle
  • ajlawrence/nettle
  • mhoffmann/nettle
  • devnexen/nettle
  • wiml/nettle
  • lumag/nettle
  • michaelweiser/nettle
  • aberaud/nettle
  • mamonet/nettle
  • npocs/nettle
  • babelouest/nettle
  • ueno/nettle
  • rth/nettle
14 results
Show changes
Showing
with 1604 additions and 0 deletions
C Loads one word, and adds it to the subkey. Uses T0
C AES_LOAD(SRC, KEY, REG, INCR)
define(`AES_LOAD_INCR', `
ldrb $3, [$1], #+1
ldrb T0, [$1], #+1
orr $3, T0, lsl #8
ldrb T0, [$1], #+1
orr $3, T0, lsl #16
ldrb T0, [$1], #+1
orr $3, T0, lsl #24
ldr T0, [$2], #$4
eor $3, T0
')
C Loads one word, and adds it to the subkey. Uses T0
C AES_LOAD(SRC, KEY, REG)
define(`AES_LOAD', `AES_LOAD_INCR($1, $2, $3, +4)')
C Stores one word. Destroys input.
C AES_STORE(DST, X)
define(`AES_STORE', `
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
ror $2, $2, #8
strb $2, [$1], #+1
')
C AES_FINAL_ROUND_V6(a,b,c,d,key,res)
define(`AES_FINAL_ROUND_V6', `
uxtb T0, $1
ldrb $6, [TABLE, T0]
uxtb T0, $2, ror #8
ldrb T0, [TABLE, T0]
eor $6, $6, T0, lsl #8
uxtb T0, $3, ror #16
ldrb T0, [TABLE, T0]
eor $6, $6, T0, lsl #16
ldrb T0, [TABLE, $4, lsr #24]
eor $6, $6, T0, lsl #24
ldr T0, [$5], #+4
eor $6, $6, T0
')
C AES_FINAL_ROUND_V5(a,b,c,d,key,res,mask)
C Avoids the uxtb instruction, introduced in ARMv6.
C The mask argument should hold the constant 0xff
define(`AES_FINAL_ROUND_V5', `
and T0, $7, $1
ldrb $6, [TABLE, T0]
and T0, $7, $2, ror #8
ldrb T0, [TABLE, T0]
eor $6, $6, T0, lsl #8
and T0, $7, $3, ror #16
ldrb T0, [TABLE, T0]
eor $6, $6, T0, lsl #16
ldrb T0, [TABLE, $4, lsr #24]
eor $6, $6, T0, lsl #24
ldr T0, [$5], #+4
eor $6, T0
')
C arm/ecc-secp192r1-modp.asm
ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
.file "ecc-secp192r1-modp.asm"
.arm
define(`HP', `r0') C Overlaps unused modulo argument
define(`RP', `r1')
define(`XP', `r2')
define(`T0', `r3')
define(`T1', `r4')
define(`T2', `r5')
define(`T3', `r6')
define(`T4', `r7')
define(`T5', `r8')
define(`T6', `r10')
define(`T7', `r11')
define(`H0', `T0') C Overlaps T0 and T1
define(`H1', `T1')
define(`C2', `HP')
define(`C4', `r12')
C ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
.align 2
PROLOGUE(_nettle_ecc_secp192r1_modp)
push {r4,r5,r6,r7,r8,r10,r11}
C Reduce two words at a time
add HP, XP, #48
add XP, XP, #8
ldmdb HP!, {H0,H1}
ldm XP, {T2,T3,T4,T5,T6,T7}
mov C4, #0
adds T4, T4, H0
adcs T5, T5, H1
adcs T6, T6, H0
adcs T7, T7, H1
C Need to add carry to T2 and T4, do T4 later.
adc C4, C4, #0
ldmdb HP!, {H0,H1}
mov C2, #0
adcs T2, T2, H0
adcs T3, T3, H1
adcs T4, T4, H0
adcs T5, T5, H1
C Need to add carry to T0 and T2, do T2 later
adc C2, C2, #0
ldmdb XP!, {T0, T1}
adcs T0, T0, T6
adcs T1, T1, T7
adcs T2, T2, T6
adcs T3, T3, T7
adc C4, C4, #0
adds T2, T2, C2
adcs T3, T3, #0
adcs T4, T4, C4
adcs T5, T5, #0
mov C2, #0
adc C2, C2, #0
C Add in final carry
adcs T0, T0, #0
adcs T1, T1, #0
adcs T2, T2, C2
adcs T3, T3, #0
adcs T4, T4, #0
adc T5, T5, #0
stm RP, {T0,T1,T2,T3,T4,T5}
pop {r4,r5,r6,r7,r8,r10,r11}
bx lr
EPILOGUE(_nettle_ecc_secp192r1_modp)
C arm/ecc-secp224r1-modp.asm
ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
.file "ecc-secp224r1-modp.asm"
.arm
define(`RP', `r1') C Overlaps T0
define(`XP', `r2')
define(`H', `r0') C Overlaps unused modulo argument
define(`T0', `r1')
define(`T1', `r3')
define(`T2', `r4')
define(`T3', `r5')
define(`T4', `r6')
define(`T5', `r7')
define(`T6', `r8')
define(`N3', `r10')
define(`L0', `r11')
define(`L1', `r12')
define(`L2', `lr')
C ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
.align 2
PROLOGUE(_nettle_ecc_secp224r1_modp)
C Pushes RP last
push {r1,r4,r5,r6,r7,r8,r10,r11,lr}
add L2, XP, #28
ldm L2, {T0,T1,T2,T3,T4,T5,T6}
mov H, #0
adds T0, T0, T4
adcs T1, T1, T5
adcs T2, T2, T6
adc H, H, #0
C This switch from adcs to sbcs takes carry into account with
C correct sign, but it always subtracts 1 too much. We arrange
C to also add B^7 + 1 below, so the effect is adding p. This
C addition of p also ensures that the result never is
C negative.
sbcs N3, T3, T0
sbcs T4, T4, T1
sbcs T5, T5, T2
sbcs T6, T6, H
mov H, #1 C This is the B^7
sbc H, #0
subs T6, T6, T3
sbc H, #0
C Now subtract from low half
ldm XP!, {L0,L1,L2}
C Clear carry, with the sbcs, this is the 1.
adds XP, #0
sbcs T0, L0, T0
sbcs T1, L1, T1
sbcs T2, L2, T2
ldm XP!, {T3,L0,L1,L2}
sbcs T3, T3, N3
sbcs T4, L0, T4
sbcs T5, L1, T5
sbcs T6, L2, T6
rsc H, H, #0
C Now -2 <= H <= 0 is the borrow, so subtract (B^3 - 1) |H|
C Use (B^3 - 1) H = <H, H, H> if -1 <=H <= 0, and
C (B^3 - 1) H = <1,B-1, B-1, B-2> if H = -2
subs T0, T0, H
asr L1, H, #1
sbcs T1, T1, L1
eor H, H, L1
sbcs T2, T2, L1
sbcs T3, T3, H
sbcs T4, T4, #0
sbcs T5, T5, #0
sbcs T6, T6, #0
sbcs H, H, H
pop {XP} C Original RP
C Final borrow, subtract (B^3 - 1) |H|
subs T0, T0, H
sbcs T1, T1, H
sbcs T2, T2, H
sbcs T3, T3, #0
sbcs T4, T4, #0
sbcs T5, T5, #0
sbcs T6, T6, #0
stm XP, {T0,T1,T2,T3,T4,T5,T6}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp224r1_modp)
C arm/ecc-secp256r1-redc.asm
ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
.file "ecc-secp256r1-redc.asm"
.arm
define(`RP', `r1') C Overlaps T1 below
define(`XP', `r2')
define(`T0', `r0') C Overlaps unused modulo argument
define(`T1', `r1')
define(`T2', `r3')
define(`T3', `r4')
define(`T4', `r5')
define(`T5', `r6')
define(`T6', `r7')
define(`T7', `r8')
define(`F0', `r10')
define(`F1', `r11')
define(`F2', `r12')
define(`F3', `lr')
C ecc_secp256r1_redc (const struct ecc_modulo *m, mp_limb_t *rp)
.text
.align 2
PROLOGUE(_nettle_ecc_secp256r1_redc)
C Pushes RP last
push {r1, r4,r5,r6,r7,r8,r10,r11,lr}
ldm XP!, {T0,T1,T2,T3,T4,T5,T6,T7}
C Set <F3,F2,F1> to the high 4 limbs of (B^2-B+1)<T2,T1,T0>
C T2 T1
C T2 T1 T0
C - T2 T1 T0
C -------------
C F3 F2 F1 F0
adds F1, T0, T2
adcs F2, T1, #0
adc F3, T2, #0
subs F0, T1, T0
sbcs F1, F1, T1 C Could also be rsc ?
sbcs F2, F2, T2
sbc F3, F3, #0
C Add:
C T10 T9 T8 T7 T6 T5 T4 T3
C + F3 F2 F1 F0 T0 T2 T1 T0
C --------------------------
C T7 T6 T5 T4 T3 T2 T1 T0
adds T3, T3, T0
adcs T1, T4, T1
adcs T2, T5, T2
adcs T6, T6, T0
mov T0, T3 C FIXME: Be more clever?
mov T3, T6
adcs T4, T7, F0
ldm XP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
C New F3, F2, F1, F0, also adding in carry
adcs F1, T0, T2
adcs F2, T1, #0
adc F3, T2, #0
subs F0, T1, T0
sbcs F1, F1, T1 C Could also be rsc ?
sbcs F2, F2, T2
sbc F3, F3, #0
C Start adding
adds T3, T3, T0
adcs T1, T4, T1
adcs T2, T5, T2
adcs T6, T6, T0
mov T0, T3 C FIXME: Be more clever?
mov T3, T6
adcs T4, T7, F0
ldm XP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
C Final iteration, eliminate only T0, T1
C Set <F2, F1, F0> to the high 3 limbs of (B^2-B+1)<T1,T0>
C T1 T0 T1
C - T1 T0
C -------------
C F2 F1 F0
C First add in carry
adcs F1, T0, #0
adcs F2, T1, #0
subs F0, T1, T0
sbcs F1, F1, T1
sbc F2, F2, #0
C Add:
C T9 T8 T7 T6 T5 T4 T3 T2
C + F2 F1 F0 T0 0 T1 T0 0
C --------------------------
C F2 F1 T7 T6 T5 T4 T3 T2
adds T3, T3, T0
adcs T4, T4, T1
adcs T5, T5, #0
adcs T6, T6, T0
adcs T7, T7, F0
ldm XP!, {T0, T1}
mov F3, #0
adcs F1, F1, T0
adcs F2, F2, T1
C Sum is < B^8 + p, so it's enough to fold carry once,
C If carry, add in
C B^7 - B^6 - B^3 + 1 = <0, B-2, B-1, B-1, B-1, 0, 0, 1>
C Mask from carry flag, leaving carry intact
adc F3, F3, #0
rsb F3, F3, #0
pop {XP} C Original RP
adcs T0, T2, #0
adcs T1, T3, #0
adcs T2, T4, #0
adcs T3, T5, F3
adcs T4, T6, F3
adcs T5, T7, F3
and F3, F3, #-2
adcs T6, F1, F3
adcs T7, F2, #0
stm XP, {T0,T1,T2,T3,T4,T5,T6,T7}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp256r1_redc)
C arm/ecc-secp384r1-modp.asm
ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
.file "ecc-secp384r1-modp.asm"
.arm
define(`RP', `r1')
define(`XP', `r2')
define(`T0', `r0')
define(`T1', `r3')
define(`T2', `r4')
define(`T3', `r5')
define(`F0', `r6')
define(`F1', `r7')
define(`F2', `r8')
define(`F3', `r10')
define(`F4', `r11')
define(`N', `r12')
define(`H', `lr')
C ecc_secp384r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
.align 2
PROLOGUE(_nettle_ecc_secp384r1_modp)
push {r4,r5,r6,r7,r8,r10,r11,lr}
add XP, XP, #80
ldm XP, {T0, T1, T2, T3} C 20-23
C First get top 4 limbs, which need folding twice, as
C
C T3 T2 T1 T0
C T3 T2 T1
C -T3
C ----------------
C F4 F3 F2 F1 F0
C
C Start with
C
C T3 T1 T0
C T1
C -T3
C -----------
C F2 F1 F0 Always fits
adds F0, T0, T1
adcs F1, T1, #0
adcs F2, T3, #0
subs F0, F0, T3
sbcs F1, F1, #0
sbcs F2, F2, #0
C T3 T2 T2 0
C F2 F1 F0
C ----------------
C F4 F3 F2 F1 F0
mov F4, #0
adds F1, F1, T2
adcs F2, F2, T2
adcs F3, T3, #0
adcs F4, F4, #0
C Add in to high part
sub XP, XP, #32
ldm XP, {T0, T1, T2, T3} C 12-15
mov H, #0
adds F0, T0, F0
adcs F1, T1, F1
adcs F2, T2, F2
adcs F3, T3, F3
adcs F4, F4, #0 C Do F4 later
C Add to low part, keeping carry (positive or negative) in H
sub XP, XP, #48
ldm XP, {T0, T1, T2, T3} C 0-3
mov H, #0
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
adc H, H, #0
subs T1, T1, F0
sbcs T2, T2, F1
sbcs T3, T3, F2
sbc H, H, #0
adds T3, T3, F0
adc H, H, #0
stm XP!, {T0,T1,T2,T3} C 0-3
mov N, #2
.Loop:
ldm XP, {T0,T1,T2,T3} C 4-7
C First, propagate carry
adds T0, T0, H
asr H, #31 C Sign extend
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
adc H, H, #0
C +B^4 term
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
adc H, H, #0
C +B^3 terms
ldr F0, [XP, #+48] C 16
adds T0, T0, F1
adcs T1, T1, F2
adcs T2, T2, F3
adcs T3, T3, F0
adc H, H, #0
C -B
ldr F1, [XP, #+52] C 17-18
ldr F2, [XP, #+56]
subs T0, T0, F3
sbcs T1, T1, F0
sbcs T2, T2, F1
sbcs T3, T3, F2
sbcs H, H, #0
C +1
ldr F3, [XP, #+60] C 19
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
adc H, H, #0
subs N, N, #1
stm XP!, {T0,T1,T2,T3}
bne .Loop
C Fold high limbs, we need to add in
C
C F4 F4 0 -F4 F4 H H 0 -H H
C
C We always have F4 >= 0, but we can have H < 0.
C Sign extension gets tricky when F4 = 0 and H < 0.
sub XP, XP, #48
ldm XP, {T0,T1,T2,T3} C 0-3
C H H 0 -H H
C ----------------
C S H F3 F2 F1 F0
C
C Define S = H >> 31 (asr), we then have
C
C F0 = H
C F1 = S - H
C F2 = - [H > 0]
C F3 = H - [H > 0]
C H = H + S
C
C And we get underflow in S - H iff H > 0
C H = 0 H > 0 H = -1
mov F0, H C 0 H -1
asr H, #31
subs F1, H, F0 C 0,C=1 -H,C=0 0,C=1
sbc F2, F2, F2 C 0 -1 0
sbc F3, F0, #0 C 0 H-1 -1
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
adc H, H, F0 C 0+cy H+cy -2+cy
stm XP!, {T0,T1,T2,T3} C 0-3
ldm XP, {T0,T1,T2,T3} C 4-7
C F4 0 -F4
C ---------
C F3 F2 F1
rsbs F1, F4, #0
sbc F2, F2, F2
sbc F3, F4, #0
C Sign extend H
adds F0, F4, H
asr H, H, #31
adcs F1, F1, H
adcs F2, F2, H
adcs F3, F3, H
adcs F4, F4, H
adc H, H, #0
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
stm XP!, {T0,T1,T2,T3} C 4-7
ldm XP, {T0,T1,T2,T3} C 8-11
adcs T0, T0, F4
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
adc H, H, #0
stm XP, {T0,T1,T2,T3} C 8-11
C Final (unlikely) carry
sub XP, XP, #32
ldm XP!, {T0,T1,T2,T3} C 0-3
C Fold H into F0-F4
mov F0, H
asr H, #31
subs F1, H, F0
sbc F2, F2, F2
sbc F3, F0, #0
add F4, F0, H
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
stm RP!, {T0,T1,T2,T3} C 0-3
ldm XP!, {T0,T1,T2,T3} C 4-7
adcs T0, T0, F4
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
stm RP!, {T0,T1,T2,T3} C 4-7
ldm XP, {T0,T1,T2,T3} C 8-11
adcs T0, T0, H
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
stm RP, {T0,T1,T2,T3} C 8-11
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp384r1_modp)
C arm/ecc-secp521r1-modp.asm
ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
.file "ecc-secp521r1-modp.asm"
.arm
define(`HP', `r0')
define(`RP', `r1')
define(`XP', `r2')
define(`T0', `r3')
define(`T1', `r4')
define(`T2', `r5')
define(`F0', `r6')
define(`F1', `r7')
define(`F2', `r8')
define(`F3', `r10')
define(`H', `r12')
define(`N', `lr')
C ecc_secp521r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
.Lc511:
.int 511
.align 2
PROLOGUE(_nettle_ecc_secp521r1_modp)
push {r4,r5,r6,r7,r8,r10,lr}
C Use that B^17 = 2^23 (mod p)
ldr F3, [XP, #+68] C 17
add HP, XP, #72 C 18
ldr T0, [XP] C 0
adds T0, T0, F3, lsl #23
str T0, [XP], #+4
mov N, #5
C 5 iterations, reading limbs 18-20, 21-23, 24-26, 27-29, 30-32
C and adding to limbs 1-3, 4-6, 7-9, 19-12, 13-15
.Loop:
ldm XP, {T0,T1,T2} C 1+3*k -- 3+3*k
lsr F0, F3, #9
ldm HP!, {F1,F2,F3} C 18+3*k -- 20+3*k
orr F0, F0, F1, lsl #23
lsr F1, F1, #9
orr F1, F1, F2, lsl #23
lsr F2, F2, #9
orr F2, F2, F3, lsl #23
adcs T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
sub N, N, #1
stm XP!,{T0,T1,T2}
teq N, #0
bne .Loop
ldr F0, [XP], #-64 C 16
ldr F1, [HP] C 33
ldr T0, .Lc511
C Handling of high limbs
C F0 = rp[16] + carry in + F3 >> 9
adcs F0, F0, F3, lsr #9
C Copy low 9 bits to H, then shift right including carry
and H, F0, T0
mov F0, F0, rrx
lsr F0, F0, #8
C Add in F1 = rp[33], with weight 2^1056 = 2^14
adds F0, F0, F1, lsl #14
lsr F1, F1, #18
adc F1, F1, #0
ldm XP!, {T0, T1} C 0-1
adds T0, T0, F0
adcs T1, T1, F1
stm RP!, {T0, T1}
ldm XP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8
adcs T0, T0, #0
adcs T1, T1, #0
adcs T2, T2, #0
adcs F0, F0, #0
adcs F1, F1, #0
adcs F2, F2, #0
adcs F3, F3, #0
stm RP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8
ldm XP, {T0,T1,T2,F0,F1,F2,F3} C 9-15
adcs T0, T0, #0
adcs T1, T1, #0
adcs T2, T2, #0
adcs F0, F0, #0
adcs F1, F1, #0
adcs F2, F2, #0
adcs F3, F3, #0
adcs H, H, #0
stm RP, {T0,T1,T2,F0,F1,F2,F3,H} C 9-16
pop {r4,r5,r6,r7,r8,r10,pc}
EPILOGUE(_nettle_ecc_secp521r1_modp)
C arm/fat/aes-decrypt-internal-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
define(`fat_transform', `$1_armv6')
include_src(`arm/v6/aes-decrypt-internal.asm')
C arm/fat/aes-decrypt-internal.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
define(`fat_transform', `$1_arm')
include_src(`arm/aes-decrypt-internal.asm')
C arm/fat/aes-encrypt-internal-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
define(`fat_transform', `$1_armv6')
include_src(`arm/v6/aes-encrypt-internal.asm')
C arm/fat/aes-encrypt-internal.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
define(`fat_transform', `$1_arm')
include_src(`arm/aes-encrypt-internal.asm')
C arm/fat/chacha-3core.asm
ifelse(`
Copyright (C) 2020 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(_nettle_fat_chacha_3core) picked up by configure
include_src(`arm/neon/chacha-3core.asm')
C arm/fat/salsa20-2core.asm
ifelse(`
Copyright (C) 2020 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(_nettle_fat_salsa20_2core) picked up by configure
include_src(`arm/neon/salsa20-2core.asm')
C arm/fat/sha1-compress-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(nettle_sha1_compress) picked up by configure
define(`fat_transform', `_$1_armv6')
include_src(`arm/v6/sha1-compress.asm')
C arm/fat/sha256-compress-n-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_armv6')
include_src(`arm/v6/sha256-compress-n.asm')
C arm/fat/sha3-permute-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(_nettle_sha3_permute) picked up by configure
define(`fat_transform', `_$1_neon')
include_src(`arm/neon/sha3-permute.asm')
C arm/fat/sha3-compress-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(_nettle_sha512_compress) picked up by configure
define(`fat_transform', `$1_neon')
include_src(`arm/neon/sha512-compress.asm')
C arm/fat/umac-nh-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(_nettle_umac_nh) picked up by configure
define(`fat_transform', `$1_neon')
include_src(`arm/neon/umac-nh.asm')
C arm/fat/umac-nh-n-2.asm
ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
dnl PROLOGUE(_nettle_umac_nh_n) picked up by configure
define(`fat_transform', `$1_neon')
include_src(`arm/neon/umac-nh-n.asm')
define(`QREG', `ifelse(
$1, d0, q0,
$1, d2, q1,
$1, d4, q2,
$1, d6, q3,
$1, d8, q4,
$1, d10, q5,
$1, d12, q6,
$1, d14, q7,
$1, d16, q8,
$1, d18, q9,
$1, d20, q10,
$1, d22, q11,
$1, d24, q12,
$1, d26, q13,
$1, d28, q14,
$1, d30, q15,
`NO REGISTER')')dnl
define(`D0REG', `ifelse(
$1, q0, d0,
$1, q1, d2,
$1, q2, d4,
$1, q3, d6,
$1, q4, d8,
$1, q5, d10,
$1, q6, d12,
$1, q7, d14,
$1, q8, d16,
$1, q9, d18,
$1, q10, d20,
$1, q11, d22,
$1, q12, d24,
$1, q13, d26,
$1, q14, d28,
$1, q15, d30,
`NO REGISTER')')dnl
define(`D1REG', `ifelse(
$1, q0, d1,
$1, q1, d3,
$1, q2, d5,
$1, q3, d7,
$1, q4, d9,
$1, q5, d11,
$1, q6, d13,
$1, q7, d15,
$1, q8, d17,
$1, q9, d19,
$1, q10, d21,
$1, q11, d23,
$1, q12, d25,
$1, q13, d27,
$1, q14, d29,
$1, q15, d31,
`NO REGISTER')')dnl
C arm/memxor.asm
ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
C Possible speedups:
C
C The ldm instruction can do load two registers per cycle,
C if the address is two-word aligned. Or three registers in two
C cycles, regardless of alignment.
C Register usage:
define(`DST', `r0')
define(`SRC', `r1')
define(`N', `r2')
define(`CNT', `r6')
define(`TNC', `r12')
C little-endian and big-endian need to shift in different directions for
C alignment correction
define(`S0ADJ', IF_LE(`lsr', `lsl'))
define(`S1ADJ', IF_LE(`lsl', `lsr'))
.syntax unified
.file "memxor.asm"
.text
.arm
C memxor(void *dst, const void *src, size_t n)
.align 4
PROLOGUE(nettle_memxor)
cmp N, #0
beq .Lmemxor_done
cmp N, #7
bcs .Lmemxor_large
C Simple byte loop
.Lmemxor_bytes:
ldrb r3, [SRC], #+1
ldrb r12, [DST]
eor r3, r12
strb r3, [DST], #+1
subs N, #1
bne .Lmemxor_bytes
.Lmemxor_done:
bx lr
.Lmemxor_align_loop:
ldrb r3, [SRC], #+1
ldrb r12, [DST]
eor r3, r12
strb r3, [DST], #+1
sub N, #1
.Lmemxor_large:
tst DST, #3
bne .Lmemxor_align_loop
C We have at least 4 bytes left to do here.
sub N, #4
ands r3, SRC, #3
beq .Lmemxor_same
C Different alignment case.
C v original SRC
C +-------+------+
C |SRC |SRC+4 |
C +---+---+------+
C |DST |
C +-------+
C
C With little-endian, we need to do
C DST[i] ^= (SRC[i] >> CNT) ^ (SRC[i+1] << TNC)
C With big-endian, we need to do
C DST[i] ^= (SRC[i] << CNT) ^ (SRC[i+1] >> TNC)
push {r4,r5,r6}
lsl CNT, r3, #3
bic SRC, #3
rsb TNC, CNT, #32
ldr r4, [SRC], #+4
tst N, #4
itet eq
moveq r5, r4
subne N, #4
beq .Lmemxor_odd
.Lmemxor_word_loop:
ldr r5, [SRC], #+4
ldr r3, [DST]
eor r3, r3, r4, S0ADJ CNT
eor r3, r3, r5, S1ADJ TNC
str r3, [DST], #+4
.Lmemxor_odd:
ldr r4, [SRC], #+4
ldr r3, [DST]
eor r3, r3, r5, S0ADJ CNT
eor r3, r3, r4, S1ADJ TNC
str r3, [DST], #+4
subs N, #8
bcs .Lmemxor_word_loop
adds N, #8
beq .Lmemxor_odd_done
C We have TNC/8 left-over bytes in r4, high end on LE and low end on
C BE, excess bits to be discarded by alignment adjustment at the other
S0ADJ r4, CNT
C now byte-aligned at low end on LE and high end on BE
ldr r3, [DST]
eor r3, r4
pop {r4,r5,r6}
C Store bytes, one by one.
.Lmemxor_leftover:
C bring uppermost byte down for saving while preserving lower ones
IF_BE(` ror r3, #24')
strb r3, [DST], #+1
subs N, #1
beq .Lmemxor_done
subs TNC, #8
C bring down next byte, no need to preserve
IF_LE(` lsr r3, #8')
bne .Lmemxor_leftover
b .Lmemxor_bytes
.Lmemxor_odd_done:
pop {r4,r5,r6}
bx lr
.Lmemxor_same:
push {r4,r5,r6,r7,r8,r10,r11,r14} C lr is the link register
subs N, #8
bcc .Lmemxor_same_end
ldmia SRC!, {r3, r4, r5}
C Keep address for loads in r14
mov r14, DST
ldmia r14!, {r6, r7, r8}
subs N, #12
eor r10, r3, r6
eor r11, r4, r7
eor r12, r5, r8
bcc .Lmemxor_same_final_store
subs N, #12
ldmia r14!, {r6, r7, r8}
bcc .Lmemxor_same_wind_down
C 6 cycles per iteration, 0.50 cycles/byte. For this speed,
C loop starts at offset 0x11c in the object file.
.Lmemxor_same_loop:
C r10-r12 contains values to be stored at DST
C r6-r8 contains values read from r14, in advance
ldmia SRC!, {r3, r4, r5}
subs N, #12
stmia DST!, {r10, r11, r12}
eor r10, r3, r6
eor r11, r4, r7
eor r12, r5, r8
ldmia r14!, {r6, r7, r8}
bcs .Lmemxor_same_loop
.Lmemxor_same_wind_down:
C Wind down code
ldmia SRC!, {r3, r4, r5}
stmia DST!, {r10, r11, r12}
eor r10, r3, r6
eor r11, r4, r7
eor r12, r5, r8
.Lmemxor_same_final_store:
stmia DST!, {r10, r11, r12}
.Lmemxor_same_end:
C We have 0-11 bytes left to do, and N holds number of bytes -12.
adds N, #4
bcc .Lmemxor_same_lt_8
C Do 8 bytes more, leftover is in N
ldmia SRC!, {r3, r4}
ldmia DST, {r6, r7}
eor r3, r6
eor r4, r7
stmia DST!, {r3, r4}
pop {r4,r5,r6,r7,r8,r10,r11,r14}
beq .Lmemxor_done
b .Lmemxor_bytes
.Lmemxor_same_lt_8:
pop {r4,r5,r6,r7,r8,r10,r11,r14}
adds N, #4
bcc .Lmemxor_same_lt_4
ldr r3, [SRC], #+4
ldr r12, [DST]
eor r3, r12
str r3, [DST], #+4
beq .Lmemxor_done
b .Lmemxor_bytes
.Lmemxor_same_lt_4:
adds N, #4
beq .Lmemxor_done
b .Lmemxor_bytes
EPILOGUE(nettle_memxor)