Commit a3328c58 authored by Niels Möller's avatar Niels Möller

ecc_mod_inv interface and allocation changes.

parent 64b9a7f8
2014-09-23 Niels Möller <nisse@lysator.liu.se>
* ecc-mod-inv.c (ecc_mod_inv): Interface change, make ap input
const, and require 2n limbs at rp. Preparing for powm-based
alternative implementations. Drop #if:ed out code and dp
temporary. Updated all callers, more complicated cases described
below.
* ecc-internal.h (typedef ecc_mod_inv_func): Added const to input
argument.
(ECC_MOD_INV_ITCH): Renamed, was ECC_MODINV_ITCH, and reduced to
2*n.
* ecc-ecdsa-verify.c (ecc_ecdsa_verify): Overhauled allocation,
putting mod_inv scratch at the end.
2014-09-22 Niels Möller <nisse@lysator.liu.se>
* ecc-random.c (ecc_mod_random): Renamed, and take a const struct
......
......@@ -65,8 +65,8 @@ curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
x = 0, and we should be fine, since ecc_modp_inv returns 0
in this case. */
ecc_modp_sub (ecc, t0, wp, vp);
/* Needs 3*size scratch, for a total of 5*size */
ecc->p.invert (&ecc->p, t1, t0, t2);
/* Needs a total of 5*size storage. */
ecc->p.invert (&ecc->p, t1, t0, t2 + ecc->p.size);
ecc_modp_add (ecc, t0, wp, vp);
ecc_modp_mul (ecc, t2, t0, t1);
......
......@@ -132,7 +132,7 @@ curve25519_mul (uint8_t *q, const uint8_t *n, const uint8_t *p)
ecc_modp_addmul_1 (ecc, AA, E, 121665);
ecc_modp_mul (ecc, z2, E, AA);
}
ecc->p.invert (&ecc->p, x3, z2, z3);
ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size);
ecc_modp_mul (ecc, z3, x2, x3);
cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size);
cnd_copy (cy, x2, z3, ecc->p.size);
......
......@@ -82,9 +82,8 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc,
/* x coordinate only, modulo q */
ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size);
/* Invert k, uses 5 * ecc->p.size including scratch */
mpn_copyi (hp, kp, ecc->p.size);
ecc->q.invert (&ecc->q, kinv, hp, tp);
/* Invert k, uses 4 * ecc->p.size including scratch */
ecc->q.invert (&ecc->q, kinv, kp, tp); /* NOTE: Also clobbers hp */
/* Process hash digest */
ecc_hash (ecc, hp, length, digest);
......
......@@ -92,11 +92,12 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc,
*/
#define P2 scratch
#define P1 (scratch + 3*ecc->p.size)
#define sinv (scratch + 3*ecc->p.size)
#define u1 (scratch + 3*ecc->p.size)
#define u2 (scratch + 4*ecc->p.size)
#define hp (scratch + 4*ecc->p.size)
#define u1 (scratch + 6*ecc->p.size)
#define P1 (scratch + 4*ecc->p.size)
#define sinv (scratch)
#define hp (scratch + ecc->p.size)
if (! (ecdsa_in_range (ecc, rp)
&& ecdsa_in_range (ecc, sp)))
......@@ -105,10 +106,13 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc,
/* FIXME: Micro optimizations: Either simultaneous multiplication.
Or convert to projective coordinates (can be done without
division, I think), and write an ecc_add_ppp. */
/* Compute sinv, use P2 as scratch */
mpn_copyi (sinv + ecc->p.size, sp, ecc->p.size);
ecc->q.invert (&ecc->q, sinv, sinv + ecc->p.size, P2);
/* Compute sinv */
ecc->q.invert (&ecc->q, sinv, sp, sinv + 2*ecc->p.size);
/* u1 = h / s, P1 = u1 * G */
ecc_hash (ecc, hp, length, digest);
ecc_modq_mul (ecc, u1, hp, sinv);
/* u2 = r / s, P2 = u2 * Y */
ecc_modq_mul (ecc, u2, rp, sinv);
......@@ -116,16 +120,12 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc,
/* Total storage: 5*ecc->p.size + ecc->mul_itch */
ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size);
/* u1 = h / s, P1 = u1 * G */
ecc_hash (ecc, hp, length, digest);
ecc_modq_mul (ecc, u1, hp, sinv);
/* u = 0 can happen only if h = 0 or h = q, which is extremely
unlikely. */
if (!zero_p (u1, ecc->p.size))
{
/* Total storage: 6*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */
ecc->mul_g (ecc, P1, u1, u1 + ecc->p.size);
/* Total storage: 7*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */
ecc->mul_g (ecc, P1, u1, P1 + 3*ecc->p.size);
/* NOTE: ecc_add_jjj and/or ecc_j_to_a will produce garbage in
case u1 G = +/- u2 V. However, anyone who gets his or her
......@@ -142,10 +142,10 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc,
private key by guessing.
*/
/* Total storage: 6*ecc->p.size + ecc->add_hhh_itch */
ecc->add_hhh (ecc, P1, P1, P2, u1);
ecc->add_hhh (ecc, P1, P1, P2, P1 + 3*ecc->p.size);
}
/* x coordinate only, modulo q */
ecc->h_to_a (ecc, 2, P2, P1, u1);
ecc->h_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size);
return (mpn_cmp (rp, P2, ecc->p.size) == 0);
#undef P2
......
......@@ -63,9 +63,8 @@ ecc_eh_to_a (const struct ecc_curve *ecc,
mp_limb_t cy;
mpn_copyi (tp, zp, ecc->p.size);
/* Needs 3*size scratch */
ecc->p.invert (&ecc->p, izp, tp, tp + ecc->p.size);
/* Needs 2*size scratch */
ecc->p.invert (&ecc->p, izp, zp, tp + ecc->p.size);
ecc_modp_mul (ecc, tp, xp, izp);
cy = mpn_sub_n (r, tp, ecc->p.m, ecc->p.size);
......
......@@ -80,7 +80,7 @@ struct ecc_modulo;
typedef void ecc_mod_func (const struct ecc_modulo *m, mp_limb_t *rp);
typedef void ecc_mod_inv_func (const struct ecc_modulo *m,
mp_limb_t *vp, mp_limb_t *ap,
mp_limb_t *vp, const mp_limb_t *ap,
mp_limb_t *scratch);
typedef void ecc_add_func (const struct ecc_curve *ecc,
......@@ -263,7 +263,7 @@ curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
mp_limb_t *scratch);
/* Current scratch needs: */
#define ECC_MODINV_ITCH(size) (3*(size))
#define ECC_MOD_INV_ITCH(size) (2*(size))
#define ECC_J_TO_A_ITCH(size) (5*(size))
#define ECC_EH_TO_A_ITCH(size) (4*(size))
#define ECC_DUP_JJ_ITCH(size) (5*(size))
......
......@@ -52,7 +52,7 @@ ecc_j_to_a (const struct ecc_curve *ecc,
mp_limb_t *scratch)
{
#define izp scratch
#define up (scratch + ecc->p.size)
#define up (scratch + 2*ecc->p.size)
#define iz2p (scratch + ecc->p.size)
#define iz3p (scratch + 2*ecc->p.size)
#define izBp (scratch + 3*ecc->p.size)
......@@ -65,10 +65,7 @@ ecc_j_to_a (const struct ecc_curve *ecc,
/* Set v = (r_z / B^2)^-1,
r_x = p_x v^2 / B^3 = ((v/B * v)/B * p_x)/B
r_y = p_y v^3 / B^4 = (((v/B * v)/B * v)/B * p_x)/B
Skip the first redc, if we want to stay in Montgomery
representation.
r_y = p_y v^3 / B^4 = (((v/B * v)/B * v)/B * p_y)/B
*/
mpn_copyi (up, p + 2*ecc->p.size, ecc->p.size);
......
......@@ -56,18 +56,21 @@ cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n)
/* Compute a^{-1} mod m, with running time depending only on the size.
Returns zero if a == 0 (mod m), to be consistent with a^{phi(m)-1}.
Also needs (m+1)/2, and m must be odd. */
Also needs (m+1)/2, and m must be odd.
Needs 2n limbs available at rp, and 2n additional scratch limbs.
*/
/* FIXME: Could use mpn_sec_invert (in GMP-6), but with a bit more
scratch need since it doesn't precompute (m+1)/2. */
void
ecc_mod_inv (const struct ecc_modulo *m,
mp_limb_t *vp, mp_limb_t *ap,
mp_limb_t *vp, const mp_limb_t *in_ap,
mp_limb_t *scratch)
{
#define bp scratch
#define dp (scratch + n)
#define up (scratch + 2*n)
#define ap scratch
#define bp (scratch + n)
#define up (vp + n)
mp_size_t n = m->size;
/* Avoid the mp_bitcnt_t type for compatibility with older GMP
......@@ -91,6 +94,7 @@ ecc_mod_inv (const struct ecc_modulo *m,
mpn_zero (up+1, n - 1);
mpn_copyi (bp, m->m, n);
mpn_zero (vp, n);
mpn_copyi (ap, in_ap, n);
for (i = m->bit_size + GMP_NUMB_BITS * n; i-- > 0; )
{
......@@ -134,29 +138,14 @@ ecc_mod_inv (const struct ecc_modulo *m,
assert (bp[0] & 1);
odd = ap[0] & 1;
/* Which variant is fastest depends on the speed of the various
cnd_* functions. Assembly implementation would help. */
#if 1
swap = cnd_sub_n (odd, ap, bp, n);
cnd_add_n (swap, bp, ap, n);
cnd_neg (swap, ap, ap, n);
#else
swap = odd & mpn_sub_n (dp, ap, bp, n);
cnd_copy (swap, bp, ap, n);
cnd_neg (swap, dp, dp, n);
cnd_copy (odd, ap, dp, n);
#endif
#if 1
cnd_swap (swap, up, vp, n);
cy = cnd_sub_n (odd, up, vp, n);
cy -= cnd_add_n (cy, up, m->m, n);
#else
cy = cnd_sub_n (odd, up, vp, n);
cnd_add_n (swap, vp, up, n);
cnd_neg (swap, up, up, n);
cnd_add_n (cy ^ swap, up, m->p, n);
#endif
cy = mpn_rshift (ap, ap, n, 1);
assert (cy == 0);
cy = mpn_rshift (up, up, n, 1);
......@@ -164,7 +153,7 @@ ecc_mod_inv (const struct ecc_modulo *m,
assert (cy == 0);
}
assert ( (ap[0] | ap[n-1]) == 0);
#undef ap
#undef bp
#undef dp
#undef up
}
......@@ -173,8 +173,7 @@ static void
bench_modinv (void *p)
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
mpn_copyi (ctx->rp + ctx->ecc->p.size, ctx->ap, ctx->ecc->p.size);
ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->rp + ctx->ecc->p.size, ctx->tp);
ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->ap, ctx->tp);
}
#if !NETTLE_USE_MINI_GMP
......
......@@ -42,9 +42,9 @@ test_modulo (gmp_randstate_t rands, const char *name,
const struct ecc_modulo *m)
{
mp_limb_t a[MAX_ECC_SIZE];
mp_limb_t ai[MAX_ECC_SIZE];
mp_limb_t ai[2*MAX_ECC_SIZE];
mp_limb_t ref[MAX_ECC_SIZE];
mp_limb_t scratch[ECC_MODINV_ITCH (MAX_ECC_SIZE)];
mp_limb_t scratch[ECC_MOD_INV_ITCH (MAX_ECC_SIZE)];
unsigned j;
mpz_t r;
......@@ -66,9 +66,8 @@ test_modulo (gmp_randstate_t rands, const char *name,
}
/* Check behaviour for a = m */
mpn_copyi (a, m->m, m->size);
memset (ai, 17, m->size * sizeof(*ai));
m->invert (m, ai, a, scratch);
m->invert (m, ai, m->m, scratch);
if (!mpn_zero_p (ai, m->size))
{
fprintf (stderr, "%s->invert failed for a = p input (bit size %u):\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment