diff --git a/ChangeLog b/ChangeLog
index 6fcc42a81facc296e64d3c45ad8eb7a94ec6057f..9d8eab6645ad72a8978fabf8aa7c224ed7dfa11c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2014-08-02  Niels Möller  <nisse@lysator.liu.se>
+
+	* ecc-internal.h (ECC_MUL_A_EH_WBITS): Set to 4, to enable
+	window-based scalar multiplication.
+
+	* ecc-mul-a-eh.c (table_init) [ECC_MUL_A_EH_WBITS > 0]: Fixed
+	initialization of TABLE(1).
+
 2014-07-29  Niels Möller  <nisse@lysator.liu.se>
 
 	* ecc-internal.h (ECC_MUL_A_EH_WBITS): New constant.
diff --git a/ecc-internal.h b/ecc-internal.h
index e233b64f75feba275d01533a224ad167c469878d..2ed15ca7e6b24eeacc10c2b908e6bd954aa951d4 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -68,11 +68,12 @@
 
 /* Window size for ecc_mul_a. Using 4 bits seems like a good choice,
    for both Intel x86_64 and ARM Cortex A9. For the larger curves, of
-   384 and 521 bits, we could improve seepd by a few percent if we go
+   384 and 521 bits, we could improve speed by a few percent if we go
    up to 5 bits, but I don't think that's worth doubling the
    storage. */
 #define ECC_MUL_A_WBITS 4
-#define ECC_MUL_A_EH_WBITS 0
+/* And for ecc_mul_a_eh */
+#define ECC_MUL_A_EH_WBITS 4
 
 
 /* Reduces from 2*ecc->size to ecc->size. */
diff --git a/ecc-mul-a-eh.c b/ecc-mul-a-eh.c
index ad017565be11d2af518c85a4cd01e854f63cc1fd..1e9f4fc762927f9391380fd05a6c2cb68064f7ec 100644
--- a/ecc-mul-a-eh.c
+++ b/ecc-mul-a-eh.c
@@ -91,8 +91,6 @@ ecc_mul_a_eh (const struct ecc_curve *ecc,
 }
 #else /* ECC_MUL_A_EH_WBITS > 1 */
 
-#error Not yet working
-
 #define TABLE_SIZE (1U << ECC_MUL_A_EH_WBITS)
 #define TABLE_MASK (TABLE_SIZE - 1)
 
@@ -111,7 +109,6 @@ table_init (const struct ecc_curve *ecc,
   TABLE(0)[ecc->size] = TABLE(0)[2*ecc->size] = 1;
 
   ecc_a_to_eh (ecc, TABLE(1), p, scratch);
-  mpn_copyi (TABLE(1), p, 3*ecc->size);
 
   for (j = 2; j < size; j += 2)
     {