From b641a4b8cc5b69a7ab293442bc0f3a02a4b88eb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Wed, 1 Jan 2020 20:22:21 +0100
Subject: [PATCH] Reduce scratch need for curve448 inverse and sqrt

---
 ChangeLog |  7 +++++++
 ecc-448.c | 48 +++++++++++++++++++++++++++++-------------------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c9895615..b7624be5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2020-01-01  Niels Möller  <nisse@lysator.liu.se>
 
+	* ecc-448.c (ecc_mod_pow_2kp1): New function.
+	(ecc_mod_pow_446m224m1): Reduce scratch usage from 6*n to 5*n, at
+	the cost of one copy operation. Also use ecc_mod_pow_2kp1 where
+	applicable.
+	(ECC_448_INV_ITCH): Reduce to 5*ECC_LIMB_SIZE.
+	(ECC_448_SQRT_ITCH): Reduce to 9*ECC_LIMB_SIZE.
+
 	* testsuite/eddsa-compress-test.c: Test also with curve448.
 
 2019-12-30  Niels Möller  <nisse@lysator.liu.se>
diff --git a/ecc-448.c b/ecc-448.c
index 6a957bb4..b32ad463 100644
--- a/ecc-448.c
+++ b/ecc-448.c
@@ -124,37 +124,48 @@ ecc_mod_pow_2k (const struct ecc_modulo *m,
     }
 }
 
-/* Computes a^{(p-3)/4} = a^{2^446-2^222-1} mod m. Needs 6 * n scratch
+static void
+ecc_mod_pow_2kp1 (const struct ecc_modulo *m,
+		  mp_limb_t *rp, const mp_limb_t *xp,
+		  unsigned k, mp_limb_t *tp)
+{
+  ecc_mod_pow_2k (m, tp, xp, k, rp);
+  ecc_mod_mul (m, rp, tp, xp);
+}
+
+/* Computes a^{(p-3)/4} = a^{2^446-2^222-1} mod m. Needs 5 * n scratch
    space. */
 static void
 ecc_mod_pow_446m224m1 (const struct ecc_modulo *p,
 		       mp_limb_t *rp, const mp_limb_t *ap,
 		       mp_limb_t *scratch)
 {
+/* Note overlap: operations writing to t0 clobber t1. */
 #define t0 scratch
-#define t1 (scratch + 2*ECC_LIMB_SIZE)
-#define t2 (scratch + 4*ECC_LIMB_SIZE)
+#define t1 (scratch + 1*ECC_LIMB_SIZE)
+#define t2 (scratch + 3*ECC_LIMB_SIZE)
 
   ecc_mod_sqr (p, rp, ap);	        /* a^2 */
   ecc_mod_mul (p, t0, ap, rp);		/* a^3 */
   ecc_mod_sqr (p, rp, t0);		/* a^6 */
   ecc_mod_mul (p, t0, ap, rp);		/* a^{2^3-1} */
-  ecc_mod_pow_2k (p, rp, t0, 3, t2);	/* a^{2^6-2^3} */
-  ecc_mod_mul (p, t1, t0, rp);		/* a^{2^6-1} */
+
+  ecc_mod_pow_2kp1 (p, t1, t0, 3, rp);	/* a^{2^6-1} */
   ecc_mod_pow_2k (p, rp, t1, 3, t2);	/* a^{2^9-2^3} */
-  ecc_mod_mul (p, t1, t0, rp);		/* a^{2^9-1} */
-  ecc_mod_pow_2k (p, t0, t1, 9, t2);	/* a^{2^18-2^9} */
-  ecc_mod_mul (p, rp, t1, t0);		/* a^{2^18-1} */
-  ecc_mod_sqr (p, t1, rp);		/* a^{2^19-2} */
-  ecc_mod_mul (p, t0, ap, t1);		/* a^{2^19-1} */
-  ecc_mod_pow_2k (p, t1, t0, 18, t2);	/* a^{2^37-2^18} */
-  ecc_mod_mul (p, t0, rp, t1);		/* a^{2^37-1} */
-  ecc_mod_pow_2k (p, t1, t0, 37, t2);	/* a^{2^74-2^37} */
-  ecc_mod_mul (p, rp, t0, t1);		/* a^{2^74-1} */
+  ecc_mod_mul (p, t2, t0, rp);		/* a^{2^9-1} */
+  ecc_mod_pow_2kp1 (p, t0, t2, 9, rp);	/* a^{2^18-1} */
+
+  ecc_mod_sqr (p, t1, t0);		/* a^{2^19-2} */
+  ecc_mod_mul (p, rp, ap, t1);		/* a^{2^19-1} */
+  ecc_mod_pow_2k (p, t1, rp, 18, t2);	/* a^{2^37-2^18} */
+  ecc_mod_mul (p, rp, t0, t1);		/* a^{2^37-1} */
+  mpn_copyi (t0, rp, p->size);
+
+  ecc_mod_pow_2kp1 (p, rp, t0, 37, t2);	/* a^{2^74-1} */
   ecc_mod_pow_2k (p, t1, rp, 37, t2);	/* a^{2^111-2^37} */
   ecc_mod_mul (p, rp, t0, t1);		/* a^{2^111-1} */
-  ecc_mod_pow_2k (p, t1, rp, 111, t2);	/* a^{2^222-2^111} */
-  ecc_mod_mul (p, t0, rp, t1);		/* a^{2^222-1} */
+  ecc_mod_pow_2kp1 (p, t0, rp, 111, t2);/* a^{2^222-1} */
+
   ecc_mod_sqr (p, t1, t0);		/* a^{2^223-2} */
   ecc_mod_mul (p, rp, ap, t1);		/* a^{2^223-1} */
   ecc_mod_pow_2k (p, t1, rp, 223, t2);	/* a^{2^446-2^223} */
@@ -164,8 +175,7 @@ ecc_mod_pow_446m224m1 (const struct ecc_modulo *p,
 #undef t2
 }
 
-/* Needs 6*ECC_LIMB_SIZE scratch space. */
-#define ECC_448_INV_ITCH (6*ECC_LIMB_SIZE)
+#define ECC_448_INV_ITCH (5*ECC_LIMB_SIZE)
 
 static void ecc_448_inv (const struct ecc_modulo *p,
 			 mp_limb_t *rp, const mp_limb_t *ap,
@@ -207,7 +217,7 @@ ecc_448_zero_p (const struct ecc_modulo *p, mp_limb_t *xp)
 */
 
 /* Needs 4*n space + scratch for ecc_mod_pow_446m224m1. */
-#define ECC_448_SQRT_ITCH (10*ECC_LIMB_SIZE)
+#define ECC_448_SQRT_ITCH (9*ECC_LIMB_SIZE)
 
 static int
 ecc_448_sqrt(const struct ecc_modulo *p, mp_limb_t *rp,
-- 
GitLab