From a3328c58174827dbec966748a074db439d8e151a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Tue, 23 Sep 2014 13:14:02 +0200 Subject: [PATCH] ecc_mod_inv interface and allocation changes. --- ChangeLog | 14 ++++++++++++++ curve25519-eh-to-x.c | 4 ++-- curve25519-mul.c | 2 +- ecc-ecdsa-sign.c | 5 ++--- ecc-ecdsa-verify.c | 32 ++++++++++++++++---------------- ecc-eh-to-a.c | 5 ++--- ecc-internal.h | 4 ++-- ecc-j-to-a.c | 7 ++----- ecc-mod-inv.c | 33 +++++++++++---------------------- examples/ecc-benchmark.c | 3 +-- testsuite/ecc-modinv-test.c | 7 +++---- 11 files changed, 56 insertions(+), 60 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2d084646..0c97135f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2014-09-23 Niels Möller <nisse@lysator.liu.se> + + * ecc-mod-inv.c (ecc_mod_inv): Interface change, make ap input + const, and require 2n limbs at rp. Preparing for powm-based + alternative implementations. Drop #if:ed out code and dp + temporary. Updated all callers, more complicated cases described + below. + * ecc-internal.h (typedef ecc_mod_inv_func): Added const to input + argument. + (ECC_MOD_INV_ITCH): Renamed, was ECC_MODINV_ITCH, and reduced to + 2*n. + * ecc-ecdsa-verify.c (ecc_ecdsa_verify): Overhauled allocation, + putting mod_inv scratch at the end. + 2014-09-22 Niels Möller <nisse@lysator.liu.se> * ecc-random.c (ecc_mod_random): Renamed, and take a const struct diff --git a/curve25519-eh-to-x.c b/curve25519-eh-to-x.c index 47f07129..8ba221f5 100644 --- a/curve25519-eh-to-x.c +++ b/curve25519-eh-to-x.c @@ -65,8 +65,8 @@ curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, x = 0, and we should be fine, since ecc_modp_inv returns 0 in this case. */ ecc_modp_sub (ecc, t0, wp, vp); - /* Needs 3*size scratch, for a total of 5*size */ - ecc->p.invert (&ecc->p, t1, t0, t2); + /* Needs a total of 5*size storage. */ + ecc->p.invert (&ecc->p, t1, t0, t2 + ecc->p.size); ecc_modp_add (ecc, t0, wp, vp); ecc_modp_mul (ecc, t2, t0, t1); diff --git a/curve25519-mul.c b/curve25519-mul.c index f4e4fa7b..0e280244 100644 --- a/curve25519-mul.c +++ b/curve25519-mul.c @@ -132,7 +132,7 @@ curve25519_mul (uint8_t *q, const uint8_t *n, const uint8_t *p) ecc_modp_addmul_1 (ecc, AA, E, 121665); ecc_modp_mul (ecc, z2, E, AA); } - ecc->p.invert (&ecc->p, x3, z2, z3); + ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size); ecc_modp_mul (ecc, z3, x2, x3); cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size); cnd_copy (cy, x2, z3, ecc->p.size); diff --git a/ecc-ecdsa-sign.c b/ecc-ecdsa-sign.c index de17ac4c..57e05a41 100644 --- a/ecc-ecdsa-sign.c +++ b/ecc-ecdsa-sign.c @@ -82,9 +82,8 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc, /* x coordinate only, modulo q */ ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size); - /* Invert k, uses 5 * ecc->p.size including scratch */ - mpn_copyi (hp, kp, ecc->p.size); - ecc->q.invert (&ecc->q, kinv, hp, tp); + /* Invert k, uses 4 * ecc->p.size including scratch */ + ecc->q.invert (&ecc->q, kinv, kp, tp); /* NOTE: Also clobbers hp */ /* Process hash digest */ ecc_hash (ecc, hp, length, digest); diff --git a/ecc-ecdsa-verify.c b/ecc-ecdsa-verify.c index a259962e..a60c89d6 100644 --- a/ecc-ecdsa-verify.c +++ b/ecc-ecdsa-verify.c @@ -92,11 +92,12 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, */ #define P2 scratch -#define P1 (scratch + 3*ecc->p.size) -#define sinv (scratch + 3*ecc->p.size) +#define u1 (scratch + 3*ecc->p.size) #define u2 (scratch + 4*ecc->p.size) -#define hp (scratch + 4*ecc->p.size) -#define u1 (scratch + 6*ecc->p.size) + +#define P1 (scratch + 4*ecc->p.size) +#define sinv (scratch) +#define hp (scratch + ecc->p.size) if (! (ecdsa_in_range (ecc, rp) && ecdsa_in_range (ecc, sp))) @@ -105,10 +106,13 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, /* FIXME: Micro optimizations: Either simultaneous multiplication. Or convert to projective coordinates (can be done without division, I think), and write an ecc_add_ppp. */ - - /* Compute sinv, use P2 as scratch */ - mpn_copyi (sinv + ecc->p.size, sp, ecc->p.size); - ecc->q.invert (&ecc->q, sinv, sinv + ecc->p.size, P2); + + /* Compute sinv */ + ecc->q.invert (&ecc->q, sinv, sp, sinv + 2*ecc->p.size); + + /* u1 = h / s, P1 = u1 * G */ + ecc_hash (ecc, hp, length, digest); + ecc_modq_mul (ecc, u1, hp, sinv); /* u2 = r / s, P2 = u2 * Y */ ecc_modq_mul (ecc, u2, rp, sinv); @@ -116,16 +120,12 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, /* Total storage: 5*ecc->p.size + ecc->mul_itch */ ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size); - /* u1 = h / s, P1 = u1 * G */ - ecc_hash (ecc, hp, length, digest); - ecc_modq_mul (ecc, u1, hp, sinv); - /* u = 0 can happen only if h = 0 or h = q, which is extremely unlikely. */ if (!zero_p (u1, ecc->p.size)) { - /* Total storage: 6*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */ - ecc->mul_g (ecc, P1, u1, u1 + ecc->p.size); + /* Total storage: 7*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */ + ecc->mul_g (ecc, P1, u1, P1 + 3*ecc->p.size); /* NOTE: ecc_add_jjj and/or ecc_j_to_a will produce garbage in case u1 G = +/- u2 V. However, anyone who gets his or her @@ -142,10 +142,10 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, private key by guessing. */ /* Total storage: 6*ecc->p.size + ecc->add_hhh_itch */ - ecc->add_hhh (ecc, P1, P1, P2, u1); + ecc->add_hhh (ecc, P1, P1, P2, P1 + 3*ecc->p.size); } /* x coordinate only, modulo q */ - ecc->h_to_a (ecc, 2, P2, P1, u1); + ecc->h_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size); return (mpn_cmp (rp, P2, ecc->p.size) == 0); #undef P2 diff --git a/ecc-eh-to-a.c b/ecc-eh-to-a.c index a0182308..4cfcad3b 100644 --- a/ecc-eh-to-a.c +++ b/ecc-eh-to-a.c @@ -63,9 +63,8 @@ ecc_eh_to_a (const struct ecc_curve *ecc, mp_limb_t cy; - mpn_copyi (tp, zp, ecc->p.size); - /* Needs 3*size scratch */ - ecc->p.invert (&ecc->p, izp, tp, tp + ecc->p.size); + /* Needs 2*size scratch */ + ecc->p.invert (&ecc->p, izp, zp, tp + ecc->p.size); ecc_modp_mul (ecc, tp, xp, izp); cy = mpn_sub_n (r, tp, ecc->p.m, ecc->p.size); diff --git a/ecc-internal.h b/ecc-internal.h index b96751f8..fe9acfb3 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -80,7 +80,7 @@ struct ecc_modulo; typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp); typedef void ecc_mod_inv_func (const struct ecc_modulo *m, - mp_limb_t *vp, mp_limb_t *ap, + mp_limb_t *vp, const mp_limb_t *ap, mp_limb_t *scratch); typedef void ecc_add_func (const struct ecc_curve *ecc, @@ -263,7 +263,7 @@ curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, mp_limb_t *scratch); /* Current scratch needs: */ -#define ECC_MODINV_ITCH(size) (3*(size)) +#define ECC_MOD_INV_ITCH(size) (2*(size)) #define ECC_J_TO_A_ITCH(size) (5*(size)) #define ECC_EH_TO_A_ITCH(size) (4*(size)) #define ECC_DUP_JJ_ITCH(size) (5*(size)) diff --git a/ecc-j-to-a.c b/ecc-j-to-a.c index 8c52eb36..2e48b94d 100644 --- a/ecc-j-to-a.c +++ b/ecc-j-to-a.c @@ -52,7 +52,7 @@ ecc_j_to_a (const struct ecc_curve *ecc, mp_limb_t *scratch) { #define izp scratch -#define up (scratch + ecc->p.size) +#define up (scratch + 2*ecc->p.size) #define iz2p (scratch + ecc->p.size) #define iz3p (scratch + 2*ecc->p.size) #define izBp (scratch + 3*ecc->p.size) @@ -65,10 +65,7 @@ ecc_j_to_a (const struct ecc_curve *ecc, /* Set v = (r_z / B^2)^-1, r_x = p_x v^2 / B^3 = ((v/B * v)/B * p_x)/B - r_y = p_y v^3 / B^4 = (((v/B * v)/B * v)/B * p_x)/B - - Skip the first redc, if we want to stay in Montgomery - representation. + r_y = p_y v^3 / B^4 = (((v/B * v)/B * v)/B * p_y)/B */ mpn_copyi (up, p + 2*ecc->p.size, ecc->p.size); diff --git a/ecc-mod-inv.c b/ecc-mod-inv.c index 22e533f2..f65c9da4 100644 --- a/ecc-mod-inv.c +++ b/ecc-mod-inv.c @@ -56,18 +56,21 @@ cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n) /* Compute a^{-1} mod m, with running time depending only on the size. Returns zero if a == 0 (mod m), to be consistent with a^{phi(m)-1}. - Also needs (m+1)/2, and m must be odd. */ + Also needs (m+1)/2, and m must be odd. + + Needs 2n limbs available at rp, and 2n additional scratch limbs. +*/ /* FIXME: Could use mpn_sec_invert (in GMP-6), but with a bit more scratch need since it doesn't precompute (m+1)/2. */ void ecc_mod_inv (const struct ecc_modulo *m, - mp_limb_t *vp, mp_limb_t *ap, + mp_limb_t *vp, const mp_limb_t *in_ap, mp_limb_t *scratch) { -#define bp scratch -#define dp (scratch + n) -#define up (scratch + 2*n) +#define ap scratch +#define bp (scratch + n) +#define up (vp + n) mp_size_t n = m->size; /* Avoid the mp_bitcnt_t type for compatibility with older GMP @@ -91,6 +94,7 @@ ecc_mod_inv (const struct ecc_modulo *m, mpn_zero (up+1, n - 1); mpn_copyi (bp, m->m, n); mpn_zero (vp, n); + mpn_copyi (ap, in_ap, n); for (i = m->bit_size + GMP_NUMB_BITS * n; i-- > 0; ) { @@ -134,29 +138,14 @@ ecc_mod_inv (const struct ecc_modulo *m, assert (bp[0] & 1); odd = ap[0] & 1; - /* Which variant is fastest depends on the speed of the various - cnd_* functions. Assembly implementation would help. */ -#if 1 swap = cnd_sub_n (odd, ap, bp, n); cnd_add_n (swap, bp, ap, n); cnd_neg (swap, ap, ap, n); -#else - swap = odd & mpn_sub_n (dp, ap, bp, n); - cnd_copy (swap, bp, ap, n); - cnd_neg (swap, dp, dp, n); - cnd_copy (odd, ap, dp, n); -#endif -#if 1 cnd_swap (swap, up, vp, n); cy = cnd_sub_n (odd, up, vp, n); cy -= cnd_add_n (cy, up, m->m, n); -#else - cy = cnd_sub_n (odd, up, vp, n); - cnd_add_n (swap, vp, up, n); - cnd_neg (swap, up, up, n); - cnd_add_n (cy ^ swap, up, m->p, n); -#endif + cy = mpn_rshift (ap, ap, n, 1); assert (cy == 0); cy = mpn_rshift (up, up, n, 1); @@ -164,7 +153,7 @@ ecc_mod_inv (const struct ecc_modulo *m, assert (cy == 0); } assert ( (ap[0] | ap[n-1]) == 0); +#undef ap #undef bp -#undef dp #undef up } diff --git a/examples/ecc-benchmark.c b/examples/ecc-benchmark.c index 0ecf3658..edf30fc2 100644 --- a/examples/ecc-benchmark.c +++ b/examples/ecc-benchmark.c @@ -173,8 +173,7 @@ static void bench_modinv (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; - mpn_copyi (ctx->rp + ctx->ecc->p.size, ctx->ap, ctx->ecc->p.size); - ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->rp + ctx->ecc->p.size, ctx->tp); + ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->ap, ctx->tp); } #if !NETTLE_USE_MINI_GMP diff --git a/testsuite/ecc-modinv-test.c b/testsuite/ecc-modinv-test.c index ea49c2b6..2faefc86 100644 --- a/testsuite/ecc-modinv-test.c +++ b/testsuite/ecc-modinv-test.c @@ -42,9 +42,9 @@ test_modulo (gmp_randstate_t rands, const char *name, const struct ecc_modulo *m) { mp_limb_t a[MAX_ECC_SIZE]; - mp_limb_t ai[MAX_ECC_SIZE]; + mp_limb_t ai[2*MAX_ECC_SIZE]; mp_limb_t ref[MAX_ECC_SIZE]; - mp_limb_t scratch[ECC_MODINV_ITCH (MAX_ECC_SIZE)]; + mp_limb_t scratch[ECC_MOD_INV_ITCH (MAX_ECC_SIZE)]; unsigned j; mpz_t r; @@ -66,9 +66,8 @@ test_modulo (gmp_randstate_t rands, const char *name, } /* Check behaviour for a = m */ - mpn_copyi (a, m->m, m->size); memset (ai, 17, m->size * sizeof(*ai)); - m->invert (m, ai, a, scratch); + m->invert (m, ai, m->m, scratch); if (!mpn_zero_p (ai, m->size)) { fprintf (stderr, "%s->invert failed for a = p input (bit size %u):\n", -- GitLab