From c71d2c9d20eeebb985e3872e4550137209e3ce4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Thu, 10 Dec 2015 18:57:34 +0100
Subject: [PATCH] Fixed miscomputation bugs in secp-256r1 modulo functions.

---
 ChangeLog |  6 ++++++
 ecc-256.c | 22 ++++++++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 7202afc5..a8a888bc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2015-12-10  Niels Möller  <nisse@lysator.liu.se>
+
+	* ecc-256.c (ecc_256_modp): Fixed carry propagation bug. Problem
+	reported by Hanno Böck.
+	(ecc_256_modq): Fixed another carry propagation bug.
+
 2015-11-23  Niels Möller  <nisse@lysator.liu.se>
 
 	* nettle.texinfo: Document rsa_encrypt, rsa_decrypt and
diff --git a/ecc-256.c b/ecc-256.c
index 4fd186e3..e757985c 100644
--- a/ecc-256.c
+++ b/ecc-256.c
@@ -113,8 +113,19 @@ ecc_256_modp (const struct ecc_modulo *p, mp_limb_t *rp)
 
       assert (q2 < 2);
 
-      /* We multiply by two low limbs of p, 2^96 - 1, so we could use
-	 shifts rather than mul. */
+      /*
+	 n-1 n-2 n-3 n-4
+        +---+---+---+---+
+        | u1| u0| u low |
+        +---+---+---+---+
+          - | q1(2^96-1)|
+            +-------+---+
+            |q2(2^.)|
+            +-------+
+
+	 We multiply by two low limbs of p, 2^96 - 1, so we could use
+	 shifts rather than mul.
+      */
       t = mpn_submul_1 (rp + n - 4, p->m, 2, q1);
       t += cnd_sub_n (q2, rp + n - 3, p->m, 1);
       t += (-q2) & 0xffffffff;
@@ -124,7 +135,10 @@ ecc_256_modp (const struct ecc_modulo *p, mp_limb_t *rp)
       u0 -= t;
       t = (u1 < cy);
       u1 -= cy;
-      u1 += cnd_add_n (t, rp + n - 4, p->m, 3);
+
+      cy = cnd_add_n (t, rp + n - 4, p->m, 2);
+      u0 += cy;
+      u1 += (u0 < cy);
       u1 -= (-t) & 0xffffffff;
     }
   rp[2] = u0;
@@ -211,7 +225,7 @@ ecc_256_modq (const struct ecc_modulo *q, mp_limb_t *rp)
 
       /* Conditional add of p */
       u1 += t;
-      u2 += (t<<32) + (u0 < t);
+      u2 += (t<<32) + (u1 < t);
 
       t = cnd_add_n (t, rp + n - 4, q->m, 2);
       u1 += t;
-- 
GitLab