From a6451ce50a68803e8dbd365ebbc755819859faa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Fri, 1 Mar 2013 10:17:27 +0100
Subject: [PATCH] ARM assembly for ecc_224_modp.

---
 ChangeLog              |   6 +++
 armv7/ecc-224-modp.asm | 111 +++++++++++++++++++++++++++++++++++++++++
 ecc-224.c              |  14 +++++-
 3 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 armv7/ecc-224-modp.asm

diff --git a/ChangeLog b/ChangeLog
index 65c866a1..a3382b58 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2013-03-01  Niels Möller  <nisse@lysator.liu.se>
 
+	* ecc-224.c: Check HAVE_NATIVE_ecc_224_modp, and use native
+	version if available.
+
+	* armv7/ecc-224-modp.asm: New file, 4.5 time speedup over C
+	version.
+
 	* configure.ac (asm_optional_list): Added ecc-224-modp.asm.
 	(OPT_ASM_SOURCES): Fixed assignment.
 
diff --git a/armv7/ecc-224-modp.asm b/armv7/ecc-224-modp.asm
new file mode 100644
index 00000000..ef7a703a
--- /dev/null
+++ b/armv7/ecc-224-modp.asm
@@ -0,0 +1,111 @@
+C nettle, low-level cryptographics library
+C
+C Copyright (C) 2013, Niels Möller
+C
+C The nettle library is free software; you can redistribute it and/or modify
+C it under the terms of the GNU Lesser General Public License as published by
+C the Free Software Foundation; either version 2.1 of the License, or (at your
+C option) any later version.
+C
+C The nettle library is distributed in the hope that it will be useful, but
+C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+C License for more details.
+C
+C You should have received a copy of the GNU Lesser General Public License
+C along with the nettle library; see the file COPYING.LIB.  If not, write to
+C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+C MA 02111-1301, USA.
+
+	.file "ecc-224-modp.asm"
+	.arm
+
+define(<RP>, <r1>)
+define(<H>, <r0>) C Overlaps unused ecc argument
+
+define(<T0>, <r2>)
+define(<T1>, <r3>)
+define(<T2>, <r4>)
+define(<T3>, <r5>)
+define(<T4>, <r6>)
+define(<T5>, <r7>)
+define(<T6>, <r8>)
+define(<N3>, <r10>)
+define(<L0>, <r11>)
+define(<L1>, <r12>)
+define(<L2>, <lr>)
+
+	C ecc_224_modp (const struct ecc_curve *ecc, mp_limb_t *rp)
+	.text
+	.align 2
+
+PROLOGUE(nettle_ecc_224_modp)
+	push	{r4,r5,r6,r7,r8,r10,r11,lr}
+
+	add	L2, RP, #28
+	ldm	L2, {T0,T1,T2,T3,T4,T5,T6}
+	mov	H, #0
+
+	adds	T0, T0, T4
+	adcs	T1, T1, T5
+	adcs	T2, T2, T6
+	adc	H, H, #0
+
+	C This switch from adcs to sbcs takes carry into account with
+	C correct sign, but it always subtracts 1 too much. We arrange
+	C to also add B^7 + 1 below, so the effect is adding p. This
+	C addition of p also ensures that the result never is
+	C negative.
+
+	sbcs	N3, T3, T0
+	sbcs	T4, T4, T1
+	sbcs	T5, T5, T2
+	sbcs	T6, T6, H
+	mov	H, #1		C This is the B^7
+	sbc	H, #0
+	subs	T6, T6, T3
+	sbc	H, #0
+
+	C Now subtract from low half
+	ldm	RP!, {L0,L1,L2}
+
+	C Clear carry, with the sbcs, this is the 1.
+	adds	RP, #0
+
+	sbcs	T0, L0, T0
+	sbcs	T1, L1, T1
+	sbcs	T2, L2, T2
+	ldm	RP!, {T3,L0,L1,L2}
+	sbcs	T3, T3, N3
+	sbcs	T4, L0, T4
+	sbcs	T5, L1, T5
+	sbcs	T6, L2, T6
+	rsc	H, H, #0
+
+	C Now -2 <= H <= 0 is the borrow, so subtract (B^3 - 1) |H|
+	C Use (B^3 - 1) H = <H, H, H> if -1 <=H <= 0, and
+	C     (B^3 - 1) H = <1,B-1, B-1, B-2> if H = -2
+	subs	T0, T0, H
+	asr	L1, H, #1
+	sbcs	T1, T1, L1
+	eor	H, H, L1
+	sbcs	T2, T2, L1
+	sbcs	T3, T3, H
+	sbcs	T4, T4, #0
+	sbcs	T5, T5, #0
+	sbcs	T6, T6, #0
+	sbcs	H, H, H
+
+	C Final borrow, subtract (B^3 - 1) |H|
+	subs	T0, T0, H
+	sbcs	T1, T1, H
+	sbcs	T2, T2, H
+	sbcs	T3, T3, #0
+	sbcs	T4, T4, #0
+	sbcs	T5, T5, #0
+	sbcs	T6, T6, #0
+
+	stmdb	RP, {T0,T1,T2,T3,T4,T5,T6}
+
+	pop	{r4,r5,r6,r7,r8,r10,r11,pc}
+EPILOGUE(nettle_ecc_224_modp)
diff --git a/ecc-224.c b/ecc-224.c
index 07e30893..cd3d05d6 100644
--- a/ecc-224.c
+++ b/ecc-224.c
@@ -30,7 +30,17 @@
 
 #include "ecc-internal.h"
 
+#if HAVE_NATIVE_ecc_224_modp
+
+#define USE_REDC 0
+#define ecc_224_modp nettle_ecc_224_modp
+void
+ecc_224_modp (const struct ecc_curve *ecc, mp_limb_t *rp);
+
+#else
 #define USE_REDC (ECC_REDC_SIZE != 0)
+#define ecc_224_modp ecc_generic_modp
+#endif
 
 #include "ecc-224.h"
 
@@ -49,9 +59,9 @@ const struct ecc_curve nettle_secp_224r1 =
   ecc_q,
   ecc_g,
   ecc_redc_g,
-  ecc_generic_modp,
+  ecc_224_modp,
   ecc_generic_redc,
-  USE_REDC ? ecc_generic_redc : ecc_generic_modp,
+  USE_REDC ? ecc_generic_redc : ecc_224_modp,
   ecc_generic_modq,
   ecc_Bmodp,
   ecc_Bmodp_shifted,
-- 
GitLab