ARM assembly for ecc_224_modp.

2013-03-01 Niels Möller <> 2013-03-01 Niels Möller <>
* ecc-224.c: Check HAVE_NATIVE_ecc_224_modp, and use native
version if available.
* armv7/ecc-224-modp.asm: New file, 4.5 time speedup over C
* (asm_optional_list): Added ecc-224-modp.asm.
(OPT_ASM_SOURCES): Fixed assignment.
C nettle, low-level cryptographics library
C Copyright (C) 2013, Niels Möller
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
C MA 02111-1301, USA.
.file "ecc-224-modp.asm"
define(<RP>, <r1>)
define(<H>, <r0>) C Overlaps unused ecc argument
define(<T0>, <r2>)
define(<T1>, <r3>)
define(<T2>, <r4>)
define(<T3>, <r5>)
define(<T4>, <r6>)
define(<T5>, <r7>)
define(<T6>, <r8>)
define(<N3>, <r10>)
define(<L0>, <r11>)
define(<L1>, <r12>)
define(<L2>, <lr>)
C ecc_224_modp (const struct ecc_curve *ecc, mp_limb_t *rp)
.align 2
push {r4,r5,r6,r7,r8,r10,r11,lr}
add L2, RP, #28
ldm L2, {T0,T1,T2,T3,T4,T5,T6}
mov H, #0
adds T0, T0, T4
adcs T1, T1, T5
adcs T2, T2, T6
adc H, H, #0
C This switch from adcs to sbcs takes carry into account with
C correct sign, but it always subtracts 1 too much. We arrange
C to also add B^7 + 1 below, so the effect is adding p. This
C addition of p also ensures that the result never is
C negative.
sbcs N3, T3, T0
sbcs T4, T4, T1
sbcs T5, T5, T2
sbcs T6, T6, H
mov H, #1 C This is the B^7
sbc H, #0
subs T6, T6, T3
sbc H, #0
C Now subtract from low half
ldm RP!, {L0,L1,L2}
C Clear carry, with the sbcs, this is the 1.
adds RP, #0
sbcs T0, L0, T0
sbcs T1, L1, T1
sbcs T2, L2, T2
ldm RP!, {T3,L0,L1,L2}
sbcs T3, T3, N3
sbcs T4, L0, T4
sbcs T5, L1, T5
sbcs T6, L2, T6
rsc H, H, #0
C Now -2 <= H <= 0 is the borrow, so subtract (B^3 - 1) |H|
C Use (B^3 - 1) H = <H, H, H> if -1 <=H <= 0, and
C (B^3 - 1) H = <1,B-1, B-1, B-2> if H = -2
subs T0, T0, H
asr L1, H, #1
sbcs T1, T1, L1
eor H, H, L1
sbcs T2, T2, L1
sbcs T3, T3, H
sbcs T4, T4, #0
sbcs T5, T5, #0
sbcs T6, T6, #0
sbcs H, H, H
C Final borrow, subtract (B^3 - 1) |H|
subs T0, T0, H
sbcs T1, T1, H
sbcs T2, T2, H
sbcs T3, T3, #0
sbcs T4, T4, #0
sbcs T5, T5, #0
sbcs T6, T6, #0
stmdb RP, {T0,T1,T2,T3,T4,T5,T6}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
...@@ -30,7 +30,17 @@ ...@@ -30,7 +30,17 @@
#include "ecc-internal.h" #include "ecc-internal.h"
#if HAVE_NATIVE_ecc_224_modp
#define USE_REDC 0
#define ecc_224_modp nettle_ecc_224_modp
ecc_224_modp (const struct ecc_curve *ecc, mp_limb_t *rp);
#define USE_REDC (ECC_REDC_SIZE != 0) #define USE_REDC (ECC_REDC_SIZE != 0)
#define ecc_224_modp ecc_generic_modp
#include "ecc-224.h" #include "ecc-224.h"
...@@ -49,9 +59,9 @@ const struct ecc_curve nettle_secp_224r1 = ...@@ -49,9 +59,9 @@ const struct ecc_curve nettle_secp_224r1 =
ecc_q, ecc_q,
ecc_g, ecc_g,
ecc_redc_g, ecc_redc_g,
ecc_generic_modp, ecc_224_modp,
ecc_generic_redc, ecc_generic_redc,
USE_REDC ? ecc_generic_redc : ecc_generic_modp, USE_REDC ? ecc_generic_redc : ecc_224_modp,
ecc_generic_modq, ecc_generic_modq,
ecc_Bmodp, ecc_Bmodp,
ecc_Bmodp_shifted, ecc_Bmodp_shifted,
