Commit b33eea3b authored by Niels Möller's avatar Niels Möller

New function ecc_mul_m.

* curve25519-mul.c (curve25519_mul): Use ecc_mul_m.
* curve448-mul.c (curve448_mul): Likewise.

* ecc-mul-m.c (ecc_mul_m): New file and function. Implements
multipliction for curves in Montgomery representation, as used for
curve25519 and curve448. Extracted from curve25519_mul.
* ecc-internal.h (ecc_mul_m): Declare.
(ECC_MUL_M_ITCH): New macro.
* Makefile.in (hogweed_SOURCES): Add ecc-mul-m.c.
parent 39af4a6a
2019-12-14 Niels Möller <nisse@lysator.liu.se>
* ecc-mul-m.c (ecc_mul_m): New file and function. Implements
multipliction for curves in Montgomery representation, as used for
curve25519 and curve448. Extracted from curve25519_mul.
* ecc-internal.h (ecc_mul_m): Declare.
(ECC_MUL_M_ITCH): New macro.
* Makefile.in (hogweed_SOURCES): Add ecc-mul-m.c.
* curve25519-mul.c (curve25519_mul): Use ecc_mul_m.
* curve448-mul.c (curve448_mul): Likewise.
2019-12-13 Niels Möller <nisse@lysator.liu.se>
* Merge curve448 implementation.
......
......@@ -180,7 +180,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \
ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c \
ecc-eh-to-a.c \
ecc-dup-eh.c ecc-add-eh.c ecc-add-ehh.c \
ecc-mul-g-eh.c ecc-mul-a-eh.c \
ecc-mul-g-eh.c ecc-mul-a-eh.c ecc-mul-m.c \
ecc-mul-g.c ecc-mul-a.c ecc-hash.c ecc-random.c \
ecc-point.c ecc-scalar.c ecc-point-mul.c ecc-point-mul-g.c \
ecc-ecdsa-sign.c ecdsa-sign.c \
......
......@@ -44,104 +44,21 @@
void
curve25519_mul (uint8_t *q, const uint8_t *n, const uint8_t *p)
{
const struct ecc_curve *ecc = &_nettle_curve25519;
const struct ecc_modulo *m = &_nettle_curve25519.p;
mp_size_t itch;
mp_limb_t *scratch;
int i;
mp_limb_t cy;
/* FIXME: Could save some more scratch space, e.g., by letting BB
overlap C, D, and CB overlap A, D. And possibly reusing some of
x2, z2, x3, z3. */
#define x1 scratch
#define x2 (scratch + ecc->p.size)
#define z2 (scratch + 2*ecc->p.size)
#define x3 (scratch + 3*ecc->p.size)
#define z3 (scratch + 4*ecc->p.size)
#define A (scratch + 5*ecc->p.size)
#define B (scratch + 6*ecc->p.size)
#define C (scratch + 7*ecc->p.size)
#define D (scratch + 8*ecc->p.size)
#define AA (scratch + 9*ecc->p.size)
#define BB (scratch +10*ecc->p.size)
#define E (scratch + 10*ecc->p.size) /* Overlap BB */
#define DA (scratch + 9*ecc->p.size) /* Overlap AA */
#define CB (scratch + 10*ecc->p.size) /* Overlap BB */
itch = ecc->p.size * 12;
scratch = gmp_alloc_limbs (itch);
/* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x1 always
mp_limb_t *x;
itch = m->size + ECC_MUL_M_ITCH(m->size);
x = gmp_alloc_limbs (itch);
/* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x always
holds at least 256 bits. */
mpn_set_base256_le (x1, ecc->p.size, p, CURVE25519_SIZE);
mpn_set_base256_le (x, m->size, p, CURVE25519_SIZE);
/* Clear bit 255, as required by RFC 7748. */
x1[255/GMP_NUMB_BITS] &= ~((mp_limb_t) 1 << (255 % GMP_NUMB_BITS));
/* Initialize, x2 = x1, z2 = 1 */
mpn_copyi (x2, x1, ecc->p.size);
z2[0] = 1;
mpn_zero (z2+1, ecc->p.size - 1);
/* Get x3, z3 from doubling. Since bit 254 is forced to 1. */
ecc_modp_add (ecc, A, x2, z2);
ecc_modp_sub (ecc, B, x2, z2);
ecc_modp_sqr (ecc, AA, A);
ecc_modp_sqr (ecc, BB, B);
ecc_modp_mul (ecc, x3, AA, BB);
ecc_modp_sub (ecc, E, AA, BB);
ecc_modp_addmul_1 (ecc, AA, E, 121665);
ecc_modp_mul (ecc, z3, E, AA);
for (i = 253; i >= 3; i--)
{
int bit = (n[i/8] >> (i & 7)) & 1;
cnd_swap (bit, x2, x3, 2*ecc->p.size);
/* Formulas from draft-turner-thecurve25519function-00-Mont. We
compute new coordinates in memory-address order, since mul
and sqr clobbers higher limbs. */
ecc_modp_add (ecc, A, x2, z2);
ecc_modp_sub (ecc, B, x2, z2);
ecc_modp_sqr (ecc, AA, A);
ecc_modp_sqr (ecc, BB, B);
ecc_modp_mul (ecc, x2, AA, BB); /* Last use of BB */
ecc_modp_sub (ecc, E, AA, BB);
ecc_modp_addmul_1 (ecc, AA, E, 121665);
ecc_modp_add (ecc, C, x3, z3);
ecc_modp_sub (ecc, D, x3, z3);
ecc_modp_mul (ecc, z2, E, AA); /* Last use of E and AA */
ecc_modp_mul (ecc, DA, D, A); /* Last use of D, A. FIXME: could
let CB overlap. */
ecc_modp_mul (ecc, CB, C, B);
ecc_modp_add (ecc, C, DA, CB);
ecc_modp_sqr (ecc, x3, C);
ecc_modp_sub (ecc, C, DA, CB);
ecc_modp_sqr (ecc, DA, C);
ecc_modp_mul (ecc, z3, DA, x1);
/* FIXME: Could be combined with the loop's initial cnd_swap. */
cnd_swap (bit, x2, x3, 2*ecc->p.size);
}
/* Do the 3 low zero bits, just duplicating x2 */
for ( ; i >= 0; i--)
{
ecc_modp_add (ecc, A, x2, z2);
ecc_modp_sub (ecc, B, x2, z2);
ecc_modp_sqr (ecc, AA, A);
ecc_modp_sqr (ecc, BB, B);
ecc_modp_mul (ecc, x2, AA, BB);
ecc_modp_sub (ecc, E, AA, BB);
ecc_modp_addmul_1 (ecc, AA, E, 121665);
ecc_modp_mul (ecc, z2, E, AA);
}
ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size);
ecc_modp_mul (ecc, z3, x2, x3);
cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size);
cnd_copy (cy, x2, z3, ecc->p.size);
mpn_get_base256_le (q, CURVE25519_SIZE, x2, ecc->p.size);
gmp_free_limbs (scratch, itch);
x[255/GMP_NUMB_BITS] &= ~((mp_limb_t) 1 << (255 % GMP_NUMB_BITS));
ecc_mul_m (m, 121665, 3, 253, x, n, x, x + m->size);
mpn_get_base256_le (q, CURVE25519_SIZE, x, m->size);
gmp_free_limbs (x, itch);
}
......@@ -46,105 +46,16 @@
void
curve448_mul (uint8_t *q, const uint8_t *n, const uint8_t *p)
{
const struct ecc_curve *ecc = &_nettle_curve448;
const struct ecc_modulo *m = &_nettle_curve448.p;
mp_size_t itch;
mp_limb_t *scratch;
int i;
mp_limb_t cy;
/* FIXME: Could save some more scratch space, e.g., by letting BB
overlap C, D, and CB overlap A, D. And possibly reusing some of
x2, z2, x3, z3. */
#define x1 scratch
#define x2 (scratch + ecc->p.size)
#define z2 (scratch + 2*ecc->p.size)
#define x3 (scratch + 3*ecc->p.size)
#define z3 (scratch + 4*ecc->p.size)
#define A (scratch + 5*ecc->p.size)
#define B (scratch + 6*ecc->p.size)
#define C (scratch + 7*ecc->p.size)
#define D (scratch + 8*ecc->p.size)
#define AA (scratch + 9*ecc->p.size)
#define BB (scratch + 10*ecc->p.size)
#define E (scratch + 10*ecc->p.size) /* Overlap BB */
#define DA (scratch + 9*ecc->p.size) /* Overlap AA */
#define CB (scratch + 10*ecc->p.size) /* Overlap BB */
#define a24 39081
itch = ecc->p.size * 12;
assert (ecc->p.invert_itch + 5*ecc->p.size <= itch);
scratch = gmp_alloc_limbs (itch);
/* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x1 always
holds at least 256 bits. */
mpn_set_base256_le (x1, ecc->p.size, p, CURVE448_SIZE);
/* Initialize, x2 = x1, z2 = 1 */
mpn_copyi (x2, x1, ecc->p.size);
z2[0] = 1;
mpn_zero (z2+1, ecc->p.size - 1);
/* Get x3, z3 from doubling. Since bit 447 is forced to 1. */
ecc_modp_add (ecc, A, x2, z2);
ecc_modp_sub (ecc, B, x2, z2);
ecc_modp_sqr (ecc, AA, A);
ecc_modp_sqr (ecc, BB, B);
ecc_modp_mul (ecc, x3, AA, BB);
ecc_modp_sub (ecc, E, AA, BB);
ecc_modp_addmul_1 (ecc, AA, E, a24);
ecc_modp_mul (ecc, z3, E, AA);
for (i = 446; i >= 2; i--)
{
int bit = (n[i/8] >> (i & 7)) & 1;
cnd_swap (bit, x2, x3, 2*ecc->p.size);
/* Formulas from RFC 7748. We compute new coordinates in
memory-address order, since mul and sqr clobbers higher
limbs. */
ecc_modp_add (ecc, A, x2, z2);
ecc_modp_sub (ecc, B, x2, z2);
ecc_modp_sqr (ecc, AA, A);
ecc_modp_sqr (ecc, BB, B);
ecc_modp_mul (ecc, x2, AA, BB);
ecc_modp_sub (ecc, E, AA, BB); /* Last use of BB */
ecc_modp_addmul_1 (ecc, AA, E, a24);
ecc_modp_add (ecc, C, x3, z3);
ecc_modp_sub (ecc, D, x3, z3);
ecc_modp_mul (ecc, z2, E, AA); /* Last use of E and AA */
ecc_modp_mul (ecc, DA, D, A); /* Last use of D, A. FIXME: could
let CB overlap. */
ecc_modp_mul (ecc, CB, C, B);
ecc_modp_add (ecc, C, DA, CB);
ecc_modp_sqr (ecc, x3, C);
ecc_modp_sub (ecc, C, DA, CB);
ecc_modp_sqr (ecc, DA, C);
ecc_modp_mul (ecc, z3, DA, x1);
/* FIXME: Could be combined with the loop's initial cnd_swap. */
cnd_swap (bit, x2, x3, 2*ecc->p.size);
}
/* Do the 2 low zero bits, just duplicating x2 */
for ( ; i >= 0; i--)
{
ecc_modp_add (ecc, A, x2, z2);
ecc_modp_sub (ecc, B, x2, z2);
ecc_modp_sqr (ecc, AA, A);
ecc_modp_sqr (ecc, BB, B);
ecc_modp_mul (ecc, x2, AA, BB);
ecc_modp_sub (ecc, E, AA, BB);
ecc_modp_addmul_1 (ecc, AA, E, a24);
ecc_modp_mul (ecc, z2, E, AA);
}
ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size);
ecc_modp_mul (ecc, z3, x2, x3);
cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size);
cnd_copy (cy, x2, z3, ecc->p.size);
mpn_get_base256_le (q, CURVE448_SIZE, x2, ecc->p.size);
gmp_free_limbs (scratch, itch);
mp_limb_t *x;
itch = m->size + ECC_MUL_M_ITCH(m->size);
x = gmp_alloc_limbs (itch);
mpn_set_base256_le (x, m->size, p, CURVE448_SIZE);
ecc_mul_m (m, 39081, 2, 446, x, n, x, x + m->size);
mpn_get_base256_le (q, CURVE448_SIZE, x, m->size);
gmp_free_limbs (x, itch);
}
......@@ -69,6 +69,7 @@
#define ecc_mul_a _nettle_ecc_mul_a
#define ecc_mul_g_eh _nettle_ecc_mul_g_eh
#define ecc_mul_a_eh _nettle_ecc_mul_a_eh
#define ecc_mul_m _nettle_ecc_mul_m
#define cnd_copy _nettle_cnd_copy
#define sec_add_1 _nettle_sec_add_1
#define sec_sub_1 _nettle_sec_sub_1
......@@ -393,6 +394,13 @@ ecc_mul_a_eh (const struct ecc_curve *ecc,
const mp_limb_t *np, const mp_limb_t *p,
mp_limb_t *scratch);
void
ecc_mul_m (const struct ecc_modulo *m,
mp_limb_t a24,
unsigned bit_low, unsigned bit_high,
mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px,
mp_limb_t *scratch);
void
cnd_copy (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n);
......@@ -439,6 +447,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
#define ECC_MUL_A_EH_ITCH(size) \
(((3 << ECC_MUL_A_EH_WBITS) + 10) * (size))
#endif
#define ECC_MUL_M_ITCH(size) (11*(size))
#define ECC_ECDSA_SIGN_ITCH(size) (12*(size))
#define ECC_MOD_RANDOM_ITCH(size) (size)
#define ECC_HASH_ITCH(size) (1+(size))
......
/* ecc-mul-m.c
Point multiplication using Montgomery curve representation.
Copyright (C) 2014 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
*/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <assert.h>
#include "ecc.h"
#include "ecc-internal.h"
void
ecc_mul_m (const struct ecc_modulo *m,
mp_limb_t a24,
unsigned bit_low, unsigned bit_high,
mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px,
mp_limb_t *scratch)
{
unsigned i;
mp_limb_t cy;
/* FIXME: Could save some more scratch space, e.g., by letting BB
overlap C, D, and CB overlap A, D. And possibly reusing some of
x2, z2, x3, z3. */
#define x2 (scratch)
#define z2 (scratch + m->size)
#define x3 (scratch + 2*m->size)
#define z3 (scratch + 3*m->size)
#define A (scratch + 4*m->size)
#define B (scratch + 5*m->size)
#define C (scratch + 6*m->size)
#define D (scratch + 7*m->size)
#define AA (scratch + 8*m->size)
#define BB (scratch +9*m->size)
#define E (scratch + 9*m->size) /* Overlap BB */
#define DA (scratch + 8*m->size) /* Overlap AA */
#define CB (scratch + 9*m->size) /* Overlap BB */
/* Initialize, x2 = px, z2 = 1 */
mpn_copyi (x2, px, m->size);
z2[0] = 1;
mpn_zero (z2+1, m->size - 1);
/* Get x3, z3 from doubling. Since most significant bit is forced to 1. */
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
ecc_mod_sqr (m, AA, A);
ecc_mod_sqr (m, BB, B);
ecc_mod_mul (m, x3, AA, BB);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
ecc_mod_mul (m, z3, E, AA);
for (i = bit_high; i >= bit_low; i--)
{
int bit = (n[i/8] >> (i & 7)) & 1;
cnd_swap (bit, x2, x3, 2*m->size);
/* Formulas from RFC 7748. We compute new coordinates in
memory-address order, since mul and sqr clobbers higher
limbs. */
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
ecc_mod_sqr (m, AA, A);
ecc_mod_sqr (m, BB, B);
ecc_mod_mul (m, x2, AA, BB); /* Last use of BB */
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
ecc_mod_add (m, C, x3, z3);
ecc_mod_sub (m, D, x3, z3);
ecc_mod_mul (m, z2, E, AA); /* Last use of E and AA */
ecc_mod_mul (m, DA, D, A); /* Last use of D, A. FIXME: could
let CB overlap. */
ecc_mod_mul (m, CB, C, B);
ecc_mod_add (m, C, DA, CB);
ecc_mod_sqr (m, x3, C);
ecc_mod_sub (m, C, DA, CB);
ecc_mod_sqr (m, DA, C);
ecc_mod_mul (m, z3, DA, px);
/* FIXME: Could be combined with the loop's initial cnd_swap. */
cnd_swap (bit, x2, x3, 2*m->size);
}
/* Do the low zero bits, just duplicating x2 */
for (i = 0; i < bit_low; i++)
{
ecc_mod_add (m, A, x2, z2);
ecc_mod_sub (m, B, x2, z2);
ecc_mod_sqr (m, AA, A);
ecc_mod_sqr (m, BB, B);
ecc_mod_mul (m, x2, AA, BB);
ecc_mod_sub (m, E, AA, BB);
ecc_mod_addmul_1 (m, AA, E, a24);
ecc_mod_mul (m, z2, E, AA);
}
assert (m->invert_itch <= 7 * m->size);
m->invert (m, x3, z2, z3 + m->size);
ecc_mod_mul (m, z3, x2, x3);
cy = mpn_sub_n (qx, z3, m->m, m->size);
cnd_copy (cy, qx, z3, m->size);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment