Commit 344c6d28 authored by Niels Möller's avatar Niels Möller

ARM assembly for ecc_256_redc.

parent 42e40f78
2013-03-01 Niels Möller <nisse@lysator.liu.se>
* ecc-256.c: Check HAVE_NATIVE_ecc_256_redc, and use native
version if available.
* armv7/ecc-256-redc.asm: New file, 4 time speedup over C version.
* testsuite/ecc-redc-test.c: Increased test count.
* ecc-224.c: Check HAVE_NATIVE_ecc_224_modp, and use native
version if available.
* armv7/ecc-224-modp.asm: New file, 4.5 time speedup over C
version.
......
C nettle, low-level cryptographics library
C
C Copyright (C) 2013, Niels Möller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
C MA 02111-1301, USA.
.file "ecc-256-redc.asm"
.arm
define(<RP>, <r1>)
define(<T0>, <r0>) C Overlaps unused ecc argument
define(<T1>, <r2>)
define(<T2>, <r3>)
define(<T3>, <r4>)
define(<T4>, <r5>)
define(<T5>, <r6>)
define(<T6>, <r7>)
define(<T7>, <r8>)
define(<F0>, <r10>)
define(<F1>, <r11>)
define(<F2>, <r12>)
define(<F3>, <lr>)
C ecc_256_redc (const struct ecc_curve *ecc, mp_limb_t *rp)
.text
.align 2
PROLOGUE(nettle_ecc_256_redc)
push {r4,r5,r6,r7,r8,r10,r11,lr}
ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7}
C Set <F3,F2,F1> to the high 4 limbs of (B^2-B+1)<T2,T1,T0>
C T2 T1
C T2 T1 T0
C - T2 T1 T0
C -------------
C F3 F2 F1 F0
adds F1, T0, T2
adcs F2, T1, #0
adc F3, T2, #0
subs F0, T1, T0
sbcs F1, F1, T1 C Could also be rsc ?
sbcs F2, F2, T2
sbc F3, F3, #0
C Add:
C T10 T9 T8 T7 T6 T5 T4 T3
C + F3 F2 F1 F0 T0 T2 T1 T0
C --------------------------
C T7 T6 T5 T4 T3 T2 T1 T0
adds T3, T3, T0
adcs T1, T4, T1
adcs T2, T5, T2
adcs T6, T6, T0
mov T0, T3 C FIXME: Be more clever?
mov T3, T6
adcs T4, T7, F0
ldm RP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
C New F3, F2, F1, F0, also adding in carry
adcs F1, T0, T2
adcs F2, T1, #0
adc F3, T2, #0
subs F0, T1, T0
sbcs F1, F1, T1 C Could also be rsc ?
sbcs F2, F2, T2
sbc F3, F3, #0
C Start adding
adds T3, T3, T0
adcs T1, T4, T1
adcs T2, T5, T2
adcs T6, T6, T0
mov T0, T3 C FIXME: Be more clever?
mov T3, T6
adcs T4, T7, F0
ldm RP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
C Final iteration, eliminate only T0, T1
C Set <F2, F1, F0> to the high 3 limbs of (B^2-B+1)<T1,T0>
C T1 T0 T1
C - T1 T0
C -------------
C F2 F1 F0
C First add in carry
adcs F1, T0, #0
adcs F2, T1, #0
subs F0, T1, T0
sbcs F1, F1, T1
sbc F2, F2, #0
C Add:
C T9 T8 T7 T6 T5 T4 T3 T2
C + F2 F1 F0 T0 0 T1 T0 0
C --------------------------
C F2 F1 T7 T6 T5 T4 T3 T2
adds T3, T3, T0
adcs T4, T4, T1
adcs T5, T5, #0
adcs T6, T6, T0
adcs T7, T7, F0
ldm RP!, {T0, T1}
mov F3, #0
adcs F1, F1, T0
adcs F2, F2, T1
C Sum is < B^8 + p, so it's enough to fold carry once,
C If carry, add in
C B^7 - B^6 - B^3 + 1 = <0, B-2, B-1, B-1, B-1, 0, 0, 1>
C Mask from carry flag, leaving carry intact
adc F3, F3, #0
rsb F3, F3, #0
adcs T0, T2, #0
adcs T1, T3, #0
adcs T2, T4, #0
adcs T3, T5, F3
adcs T4, T6, F3
adcs T5, T7, F3
and F3, F3, #-2
adcs T6, F1, F3
adcs T7, F2, #0
sub RP, RP, #64
stm RP, {T0,T1,T2,T3,T4,T5,T6,T7}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(nettle_ecc_256_redc)
......@@ -32,10 +32,18 @@
#include "ecc-internal.h"
#define USE_REDC (ECC_REDC_SIZE != 0)
#define USE_REDC (HAVE_NATIVE_ecc_256_redc || ECC_REDC_SIZE != 0)
#include "ecc-256.h"
#if HAVE_NATIVE_ecc_256_redc
# define ecc_256_redc nettle_ecc_256_redc
void
ecc_256_redc (const struct ecc_curve *ecc, mp_limb_t *rp);
#else /* !HAVE_NATIVE_ecc_256_redc */
# define ecc_256_redc ecc_generic_redc
#endif
#if ECC_BMODP_SIZE < ECC_LIMB_SIZE
#define ecc_256_modp ecc_generic_modp
#define ecc_256_modq ecc_generic_modq
......@@ -213,8 +221,8 @@ const struct ecc_curve nettle_secp_256r1 =
ecc_g,
ecc_redc_g,
ecc_256_modp,
ecc_generic_redc,
USE_REDC ? ecc_generic_redc : ecc_generic_modp,
ecc_256_redc,
USE_REDC ? ecc_256_redc : ecc_256_modp,
ecc_256_modq,
ecc_Bmodp,
ecc_Bmodp_shifted,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment