From a065a3559e5ee13161736262287b02537c4a02c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Sun, 8 Nov 2020 21:58:32 +0100 Subject: [PATCH] Eliminate one mpn_cnd_swap call in ecc_mul_m. --- ChangeLog | 3 +++ ecc-mul-m.c | 14 +++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index cc06e5a3..3998f5d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,9 @@ 2020-11-08 Niels Möller <nisse@lysator.liu.se> * ecc-mul-m.c (ecc_mul_m): Reduce scratch need. + (ecc_mul_m): Optimize swapping, with only a single mpn_cnd_swap + per iteration. + * ecc-add-jja.c (ecc_add_jja): Reduce scratch need. * ecc-add-jjj.c (ecc_add_jjj): Reduce scratch need. * ecc-internal.h (ECC_ADD_JJA_ITCH, ECC_ADD_JJJ_ITCH): Now 5*size. diff --git a/ecc-mul-m.c b/ecc-mul-m.c index 820258ca..a3355751 100644 --- a/ecc-mul-m.c +++ b/ecc-mul-m.c @@ -48,7 +48,7 @@ ecc_mul_m (const struct ecc_modulo *m, mp_limb_t *scratch) { unsigned i; - mp_limb_t cy; + mp_limb_t cy, swap; #define x2 (scratch) #define z2 (scratch + m->size) @@ -109,11 +109,12 @@ ecc_mul_m (const struct ecc_modulo *m, ecc_mod_addmul_1 (m, AA, E, a24); ecc_mod_mul (m, z3, E, AA, tp); - for (i = bit_high; i >= bit_low; i--) + for (i = bit_high, swap = 0; i >= bit_low; i--) { - int bit = (n[i/8] >> (i & 7)) & 1; + mp_limb_t bit = (n[i/8] >> (i & 7)) & 1; - mpn_cnd_swap (bit, x2, x3, 2*m->size); + mpn_cnd_swap (swap ^ bit, x2, x3, 2*m->size); + swap = bit; ecc_mod_add (m, A, x2, z2); ecc_mod_sub (m, D, x3, z3); @@ -144,10 +145,9 @@ ecc_mul_m (const struct ecc_modulo *m, ecc_mod_sub (m, z3, DA, z3); /* DA - CB */ ecc_mod_sqr (m, z3, z3, tp); ecc_mod_mul (m, z3, z3, px, tp); - - /* FIXME: Could be combined with the loop's initial mpn_cnd_swap. */ - mpn_cnd_swap (bit, x2, x3, 2*m->size); } + mpn_cnd_swap (swap, x2, x3, 2*m->size); + /* Do the low zero bits, just duplicating x2 */ for (i = 0; i < bit_low; i++) { -- GitLab