diff --git a/ChangeLog b/ChangeLog
index 278de08ba7cdc1193a3fb68e145a0bc818dcae0c..4d53b8969fb386a4fe6b962d95c602ad935882cd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2023-11-06  Niels Möller  <nisse@lysator.liu.se>
+
+	Avoid comparison like cnd = (x == 0) in code intended to be
+	side-channel silent, since to eliminate branches with some
+	compilers/architectures, in particular 32-bit x86 and the msvc compiler.
+	* nettle-internal.h (IS_ZERO_SMALL): New macro.
+	* memeql-sec.c (memeql_sec): Use IS_ZERO_SMALL.
+	* pkcs1-sec-decrypt.c (EQUAL): Likewise.
+
+	* cnd-copy.c (cnd_copy): Require that cnd argument is 1 or 0.
+	* ecc-mul-a.c (ecc_mul_a) [ECC_MUL_A_WBITS == 0]:
+	Rearrange loop to pass 0 or 1 to cnd_copy.
+	* ecc-mul-a-eh.c (ecc_mul_a_eh) [ECC_MUL_A_EH_WBITS == 0]:
+	Likewise.
+	* ecc-mul-a.c (ecc_mul_a) [ECC_MUL_A_WBITS > 0]: Use
+	IS_ZERO_SMALL, and pass 0 or 1 to cnd_copy.
+	* ecc-mul-g.c (ecc_mul_g): Likewise.
+
+	* ecc-internal.h (is_zero_limb): New inline function.
+	* eddsa-decompress.c (_eddsa_decompress): Likewise.
+	* ecc-gostdsa-sign.c (ecc_gostdsa_sign): Likewise.
+	* ecc-mod-arith.c (ecc_mod_zero_p): Likewise.
+	(ecc_mod_equal_p): Avoid comparison cy == 0.
+	* ecc-j-to-a.c (ecc_j_to_a): Avoid comparison cy == 0.
+
 2023-10-06  Niels Möller  <nisse@lysator.liu.se>
 
 	* testsuite/rsa-sec-decrypt-test.c (test_main): Skip side-channel
diff --git a/cnd-copy.c b/cnd-copy.c
index d24da3d06366bcbb1c0d5b7f4de43c377d332ef0..878cd3206446515d291334eedfa554a796e18f67 100644
--- a/cnd-copy.c
+++ b/cnd-copy.c
@@ -43,7 +43,7 @@ cnd_copy (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n)
   mp_limb_t mask, keep;
   mp_size_t i;
 
-  mask = -(mp_limb_t) (cnd !=0);
+  mask = -(mp_limb_t) cnd;
   keep = ~mask;
 
   for (i = 0; i < n; i++)
diff --git a/ecc-gostdsa-sign.c b/ecc-gostdsa-sign.c
index 491a228157783304096375285d7a68ad0d97c561..c811c87ef43234382cae86f4d9b2c725ef5b316e 100644
--- a/ecc-gostdsa-sign.c
+++ b/ecc-gostdsa-sign.c
@@ -91,7 +91,7 @@ ecc_gostdsa_sign (const struct ecc_curve *ecc,
    * so one subtraction should suffice. */
 
   *scratch = mpn_sub_n (tp, sp, ecc->q.m, ecc->p.size);
-  cnd_copy (*scratch == 0, sp, tp, ecc->p.size);
+  cnd_copy (is_zero_limb (*scratch), sp, tp, ecc->p.size);
 
 #undef P
 #undef hp
diff --git a/ecc-internal.h b/ecc-internal.h
index be02de5f534d4e4f7660e721f30245e6aaa6a51e..2a5e3ae10f0049586af4f3e42c288bf57c59b6d8 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -85,6 +85,13 @@
 #define curve25519_eh_to_x _nettle_curve25519_eh_to_x
 #define curve448_eh_to_x _nettle_curve448_eh_to_x
 
+inline int
+is_zero_limb (mp_limb_t x)
+{
+  x |= (x << 1);
+  return ((x >> 1) - 1) >> (GMP_LIMB_BITS - 1);
+}
+
 extern const struct ecc_curve _nettle_secp_192r1;
 extern const struct ecc_curve _nettle_secp_224r1;
 extern const struct ecc_curve _nettle_secp_256r1;
@@ -464,6 +471,7 @@ ecc_mul_m (const struct ecc_modulo *m,
 	   mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px,
 	   mp_limb_t *scratch);
 
+/* The cnd argument must be 1 or 0. */
 void
 cnd_copy (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n);
 
diff --git a/ecc-j-to-a.c b/ecc-j-to-a.c
index ac134b5166d5018a76e675ecea0c4d0123452d64..3196cd6fa38289f69889fa10fc5a9a33ebbe1379 100644
--- a/ecc-j-to-a.c
+++ b/ecc-j-to-a.c
@@ -72,7 +72,7 @@ ecc_j_to_a (const struct ecc_curve *ecc,
 	     already be < 2*ecc->q, so one subtraction should
 	     suffice. */
 	  cy = mpn_sub_n (scratch, r, ecc->q.m, ecc->p.size);
-	  cnd_copy (cy == 0, r, scratch, ecc->p.size);
+	  cnd_copy (1 - cy, r, scratch, ecc->p.size);
 	}
       return;
     }
diff --git a/ecc-mod-arith.c b/ecc-mod-arith.c
index d01378643c3b412697ccdc3661ba4148271b67ec..3b9bcb47cee360f5a534755021c854490d814604 100644
--- a/ecc-mod-arith.c
+++ b/ecc-mod-arith.c
@@ -55,7 +55,7 @@ ecc_mod_zero_p (const struct ecc_modulo *m, const mp_limb_t *xp_in)
       is_not_p |= (xp[i] ^ m->m[i]);
     }
 
-  return (is_non_zero == 0) | (is_not_p == 0);
+  return is_zero_limb (is_non_zero) | is_zero_limb (is_not_p);
 }
 
 int
@@ -65,7 +65,7 @@ ecc_mod_equal_p (const struct ecc_modulo *m, const mp_limb_t *a,
   mp_limb_t cy;
   cy = mpn_sub_n (scratch, a, ref, m->size);
   /* If cy > 0, i.e., a < ref, then they can't be equal mod m. */
-  return (cy == 0) & ecc_mod_zero_p (m, scratch);
+  return (1 - cy) & ecc_mod_zero_p (m, scratch);
 }
 
 void
diff --git a/ecc-mul-a-eh.c b/ecc-mul-a-eh.c
index 980fec3f715e7eb5abd09de6ae5b218c45e5e2b1..30130aeab5a15338c4a0b0af97020d580f60ad68 100644
--- a/ecc-mul-a-eh.c
+++ b/ecc-mul-a-eh.c
@@ -66,21 +66,18 @@ ecc_mul_a_eh (const struct ecc_curve *ecc,
   
   for (i = ecc->p.size; i-- > 0; )
     {
-      mp_limb_t w = np[i];
-      mp_limb_t bit;
+      mp_limb_t w = np[i] << (GMP_LIMB_BITS - GMP_NUMB_BITS);
+      unsigned j;
 
-      for (bit = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
-	   bit > 0;
-	   bit >>= 1)
+      for (j = 0; j < GMP_NUMB_BITS; j++, w <<= 1)
 	{
-	  int digit;
-
+	  int bit;
 	  ecc->dup (ecc, r, r, scratch_out);
 	  ecc->add_hh (ecc, tp, r, pe, scratch_out);
 
-	  digit = (w & bit) > 0;
+	  bit = w >> (GMP_LIMB_BITS - 1);
 	  /* If we had a one-bit, use the sum. */
-	  cnd_copy (digit, r, tp, 3*ecc->p.size);
+	  cnd_copy (bit, r, tp, 3*ecc->p.size);
 	}
     }
 }
diff --git a/ecc-mul-a.c b/ecc-mul-a.c
index 8e1355ebc933457cfa9b0da797477c9c2f8b1155..cc2a7960a1395abd8919d04717999efffb3d9af4 100644
--- a/ecc-mul-a.c
+++ b/ecc-mul-a.c
@@ -39,6 +39,7 @@
 
 #include "ecc.h"
 #include "ecc-internal.h"
+#include "nettle-internal.h"
 
 /* Binary algorithm needs 6*ecc->p.size + scratch for ecc_add_jja.
    Current total is 12 ecc->p.size, at most 864 bytes.
@@ -67,25 +68,23 @@ ecc_mul_a (const struct ecc_curve *ecc,
   
   for (i = ecc->p.size, is_zero = 1; i-- > 0; )
     {
-      mp_limb_t w = np[i];
-      mp_limb_t bit;
+      mp_limb_t w = np[i] << (GMP_LIMB_BITS - GMP_NUMB_BITS);
+      unsigned j;
 
-      for (bit = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
-	   bit > 0;
-	   bit >>= 1)
+      for (j = 0; j < GMP_NUMB_BITS; j++, w <<= 1)
 	{
-	  int digit;
+	  int bit;
 
 	  ecc_dup_jj (ecc, r, r, scratch_out);
 	  ecc_add_jja (ecc, tp, r, pj, scratch_out);
 
-	  digit = (w & bit) > 0;
+	  bit = w >> (GMP_LIMB_BITS - 1);
 	  /* If is_zero is set, r is the zero point,
 	     and ecc_add_jja produced garbage. */
 	  cnd_copy (is_zero, tp, pj, 3*ecc->p.size);
-	  is_zero &= ~digit;
+	  is_zero &= 1 - bit;
 	  /* If we had a one-bit, use the sum. */
-	  cnd_copy (digit, r, tp, 3*ecc->p.size);
+	  cnd_copy (bit, r, tp, 3*ecc->p.size);
 	}
     }
 }
@@ -145,10 +144,11 @@ ecc_mul_a (const struct ecc_curve *ecc,
   assert (bits < TABLE_SIZE);
 
   mpn_sec_tabselect (r, table, 3*ecc->p.size, TABLE_SIZE, bits);
-  is_zero = (bits == 0);
+  is_zero = IS_ZERO_SMALL (bits);
 
   for (;;)
     {
+      int bits_is_zero;
       unsigned j;
       if (shift >= ECC_MUL_A_WBITS)
 	{
@@ -174,11 +174,12 @@ ecc_mul_a (const struct ecc_curve *ecc,
       mpn_sec_tabselect (tp, table, 3*ecc->p.size, TABLE_SIZE, bits);
       cnd_copy (is_zero, r, tp, 3*ecc->p.size);
       ecc_add_jjj (ecc, tp, tp, r, scratch_out);
+      bits_is_zero = IS_ZERO_SMALL (bits);
 
       /* Use the sum when valid. ecc_add_jja produced garbage if
-	 is_zero != 0 or bits == 0, . */	  
-      cnd_copy (bits & (is_zero - 1), r, tp, 3*ecc->p.size);
-      is_zero &= (bits == 0);
+	 is_zero or bits_is_zero. */
+      cnd_copy (1 - (bits_is_zero | is_zero), r, tp, 3*ecc->p.size);
+      is_zero &= bits_is_zero;
     }
 #undef table
 #undef tp
diff --git a/ecc-mul-g.c b/ecc-mul-g.c
index 677a37e7a01979b516a5ce2007e5fae5e84d02d1..97bbabad6dd0a4393312d7d671ee108032337a14 100644
--- a/ecc-mul-g.c
+++ b/ecc-mul-g.c
@@ -39,6 +39,7 @@
 
 #include "ecc.h"
 #include "ecc-internal.h"
+#include "nettle-internal.h"
 
 void
 ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r,
@@ -71,7 +72,8 @@ ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r,
 	  /* Avoid the mp_bitcnt_t type for compatibility with older GMP
 	     versions. */
 	  unsigned bit_index;
-	  
+	  int bits_is_zero;
+
 	  /* Extract c bits from n, stride k, starting at i + kcj,
 	     ending at i + k (cj + c - 1)*/
 	  for (bits = 0, bit_index = i + k*(c*j+c); bit_index > i + k*c*j; )
@@ -96,10 +98,12 @@ ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r,
 	  cnd_copy (is_zero, r + 2*ecc->p.size, ecc->unit, ecc->p.size);
 	  
 	  ecc_add_jja (ecc, tp, r, tp, scratch_out);
+	  bits_is_zero = IS_ZERO_SMALL (bits);
+
 	  /* Use the sum when valid. ecc_add_jja produced garbage if
-	     is_zero != 0 or bits == 0, . */	  
-	  cnd_copy (bits & (is_zero - 1), r, tp, 3*ecc->p.size);
-	  is_zero &= (bits == 0);
+	     is_zero or bits_is_zero. */
+	  cnd_copy (1 - (bits_is_zero | is_zero), r, tp, 3*ecc->p.size);
+	  is_zero &= bits_is_zero;
 	}
     }
 #undef tp
diff --git a/eddsa-decompress.c b/eddsa-decompress.c
index 8517fb7bbd3490dce3272614efee42c823d775a2..0718c1a0c44998f545fc1bce1937c8b372202067 100644
--- a/eddsa-decompress.c
+++ b/eddsa-decompress.c
@@ -83,7 +83,7 @@ _eddsa_decompress (const struct ecc_curve *ecc, mp_limb_t *p,
 
   /* Check range. */
   if (nlimbs > ecc->p.size)
-    res = (scratch[nlimbs - 1] == 0);
+    res = is_zero_limb (scratch[nlimbs - 1]);
   else
     res = 1;
 
diff --git a/memeql-sec.c b/memeql-sec.c
index b19052eda4a13312662ff77439e6f5fbaf34cbc2..ba8c5dd6950a4933a9cba0cf97f98547fda99650 100644
--- a/memeql-sec.c
+++ b/memeql-sec.c
@@ -34,6 +34,7 @@
 #endif
 
 #include "memops.h"
+#include "nettle-internal.h"
 
 int
 memeql_sec (const void *a, const void *b, size_t n)
@@ -47,5 +48,5 @@ memeql_sec (const void *a, const void *b, size_t n)
   for (i = diff = 0; i < n; i++)
     diff |= (ap[i] ^ bp[i]);
 
-  return diff == 0;
+  return IS_ZERO_SMALL (diff);
 }
diff --git a/nettle-internal.h b/nettle-internal.h
index c41f3ee08516cc9ca081250acf512b9a8f90407b..2b7dc8164e00c1da8b37a161d9acc143fe4b8cac 100644
--- a/nettle-internal.h
+++ b/nettle-internal.h
@@ -84,6 +84,10 @@
 #define NETTLE_MAX_CIPHER_BLOCK_SIZE 32
 #define NETTLE_MAX_CIPHER_KEY_SIZE 32
 
+/* Equivalent to x == 0, but with an expression that should compile to
+   branch free code on all compilers. Requires that x is at most 31 bits. */
+#define IS_ZERO_SMALL(x) (((uint32_t) (x) - 1U) >> 31)
+
 /* Doesn't quite fit with the other algorithms, because of the weak
  * keys. Weak keys are not reported, the functions will simply crash
  * if you try to use a weak key. */
@@ -103,7 +107,7 @@ extern const struct nettle_aead nettle_chacha;
 extern const struct nettle_aead nettle_salsa20;
 extern const struct nettle_aead nettle_salsa20r12;
 
-/* All-in-one CBC encrypt fucntinos treated as AEAD with no
+/* All-in-one CBC encrypt functinos treated as AEAD with no
    authentication and no decrypt method. */
 extern const struct nettle_aead nettle_cbc_aes128;
 extern const struct nettle_aead nettle_cbc_aes192;
diff --git a/pkcs1-sec-decrypt.c b/pkcs1-sec-decrypt.c
index 942a2bd39633b92296ee192e8db19bac8ce76ff4..7cf56fe4b776bc6a6b870d999cdcdb4a91200ff5 100644
--- a/pkcs1-sec-decrypt.c
+++ b/pkcs1-sec-decrypt.c
@@ -44,14 +44,14 @@
 
 #include "gmp-glue.h"
 #include "pkcs1-internal.h"
+#include "nettle-internal.h"
 
 /* Inputs are always cast to uint32_t values. But all values used in this
  * function should never exceed the maximum value of a uint32_t anyway.
  * these macros returns 1 on success, 0 on failure */
 #define NOT_EQUAL(a, b) \
     ((0U - ((uint32_t)(a) ^ (uint32_t)(b))) >> 31)
-#define EQUAL(a, b) \
-    ((((uint32_t)(a) ^ (uint32_t)(b)) - 1U) >> 31)
+#define EQUAL(a, b) (IS_ZERO_SMALL ((a) ^ (b)))
 #define GREATER_OR_EQUAL(a, b) \
     (1U - (((uint32_t)(a) - (uint32_t)(b)) >> 31))