diff --git a/ChangeLog b/ChangeLog
index 592f22b8182e7ba94d2a1a3f6ffbab1fadbc1e77..b6be1848b0b8da981c6d7cca5eca76909337f446 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2010-09-24  Niels M�ller  <nisse@lysator.liu.se>
+
+	Partial revert of 2010-09-20 changes.
+	* camellia-set-encrypt-key.c (camellia_set_encrypt_key):
+	Reintroduce CAMELLIA_F_HALF_INV, for 32-bit machines.
+	* camellia-crypt-internal.c (CAMELLIA_ROUNDSM): Two variants,
+	differing in where addition of the key is done.
+	* x86/camellia-crypt-internal.asm: Moved addition of key.
+
 2010-09-22  Niels M�ller  <nisse@turmalin.hack.org>
 
 	* examples/nettle-benchmark.c (BENCH_INTERVAL): Changed unit to
diff --git a/camellia-crypt-internal.c b/camellia-crypt-internal.c
index 679c4db169288da9955a1cdfd941e07ad6776c16..8a9296e812efe021a3ad520d4ad126cd32a0292a 100644
--- a/camellia-crypt-internal.c
+++ b/camellia-crypt-internal.c
@@ -33,6 +33,7 @@
 #endif
 
 #include <assert.h>
+#include <limits.h>
 
 #include "camellia-internal.h"
 
@@ -62,33 +63,64 @@
   (x) = ((uint64_t) __xl << 32) | __xr;		\
 } while (0)
 
+#if HAVE_NATIVE_64_BIT
 #define CAMELLIA_ROUNDSM(T, x, k, y) do {			\
     uint32_t __il, __ir;					\
     __ir							\
-      = T->sp1110[(x) & 0xff]				\
-      ^ T->sp0222[((x) >> 24) & 0xff]			\
-      ^ T->sp3033[((x) >> 16) & 0xff]			\
-      ^ T->sp4404[((x) >> 8) & 0xff];			\
+      = T->sp1110[(x) & 0xff]					\
+      ^ T->sp0222[((x) >> 24) & 0xff]				\
+      ^ T->sp3033[((x) >> 16) & 0xff]				\
+      ^ T->sp4404[((x) >> 8) & 0xff];				\
     /* ir == (t6^t7^t8),(t5^t7^t8),(t5^t6^t8),(t5^t6^t7) */	\
     __il							\
-      = T->sp1110[ (x) >> 56]				\
-      ^ T->sp0222[((x) >> 48) & 0xff]			\
-      ^ T->sp3033[((x) >> 40) & 0xff]			\
-      ^ T->sp4404[((x) >> 32) & 0xff];			\
+      = T->sp1110[ (x) >> 56]					\
+      ^ T->sp0222[((x) >> 48) & 0xff]				\
+      ^ T->sp3033[((x) >> 40) & 0xff]				\
+      ^ T->sp4404[((x) >> 32) & 0xff];				\
     /* il == (t1^t3^t4),(t1^t2^t4),(t1^t2^t3),(t2^t3^t4) */	\
     __ir ^= __il;						\
     /* ir == (t1^t3^t4^t6^t7^t8),(t1^t2^t4^t5^t7^t8),		\
-             (t1^t2^t3^t5^t6^t8),(t2^t3^t4^t5^t6^t7)		\
-          == y1,y2,y3,y4 */					\
+       (t1^t2^t3^t5^t6^t8),(t2^t3^t4^t5^t6^t7)			\
+       == y1,y2,y3,y4 */					\
     __il = ROL32(24, __il);					\
     /* il == (t2^t3^t4),(t1^t3^t4),(t1^t2^t4),(t1^t2^t3) */	\
     __il ^= __ir;						\
     /* il == (t1^t2^t6^t7^t8),(t2^t3^t5^t7^t8),			\
-             (t3^t4^t5^t6^t8),(t1^t4^t5^t6^t7)			\
-          == y5,y6,y7,y8 */					\
+       (t3^t4^t5^t6^t8),(t1^t4^t5^t6^t7)			\
+       == y5,y6,y7,y8 */					\
     y ^= (k);							\
     y ^= ((uint64_t) __ir << 32) | __il;			\
   } while (0)
+#else /* !HAVE_NATIVE_64_BIT */
+#define CAMELLIA_ROUNDSM(T, x, k, y) do {			\
+    uint32_t __il, __ir;					\
+    __ir							\
+      = T->sp1110[(x) & 0xff]					\
+      ^ T->sp0222[((x) >> 24) & 0xff]				\
+      ^ T->sp3033[((x) >> 16) & 0xff]				\
+      ^ T->sp4404[((x) >> 8) & 0xff];				\
+    /* ir == (t6^t7^t8),(t5^t7^t8),(t5^t6^t8),(t5^t6^t7) */	\
+    __il							\
+      = T->sp1110[ (x) >> 56]					\
+      ^ T->sp0222[((x) >> 48) & 0xff]				\
+      ^ T->sp3033[((x) >> 40) & 0xff]				\
+      ^ T->sp4404[((x) >> 32) & 0xff];				\
+    /* il == (t1^t3^t4),(t1^t2^t4),(t1^t2^t3),(t2^t3^t4) */	\
+    __il ^= (k) >> 32;						\
+    __ir ^= (k) & 0xffffffff;					\
+    __ir ^= __il;						\
+    /* ir == (t1^t3^t4^t6^t7^t8),(t1^t2^t4^t5^t7^t8),		\
+       (t1^t2^t3^t5^t6^t8),(t2^t3^t4^t5^t6^t7)			\
+       == y1,y2,y3,y4 */					\
+    __il = ROL32(24, __il);					\
+    /* il == (t2^t3^t4),(t1^t3^t4),(t1^t2^t4),(t1^t2^t3) */	\
+    __il ^= __ir;						\
+    /* il == (t1^t2^t6^t7^t8),(t2^t3^t5^t7^t8),			\
+       (t3^t4^t5^t6^t8),(t1^t4^t5^t6^t7)			\
+       == y5,y6,y7,y8 */					\
+    y ^= ((uint64_t) __ir << 32) | __il;			\
+  } while (0)
+#endif
 
 void
 _camellia_crypt(const struct camellia_ctx *ctx,
diff --git a/camellia-set-encrypt-key.c b/camellia-set-encrypt-key.c
index ee3799466bb95dd48be13fabcdb386dd487983c2..989e3c7e79df2593d46eb1596ce4cc81ad15d2cc 100644
--- a/camellia-set-encrypt-key.c
+++ b/camellia-set-encrypt-key.c
@@ -36,6 +36,7 @@
 #endif
 
 #include <assert.h>
+#include <limits.h>
 
 #include "camellia-internal.h"
 
@@ -74,6 +75,16 @@
     (y) = ((uint64_t) __yl << 32) | __yr;	\
   } while (0)
 
+#if ! HAVE_NATIVE_64_BIT
+#define CAMELLIA_F_HALF_INV(x) do {            \
+    uint32_t __t, __w;                         \
+    __t = (x) >> 32;                           \
+    __w = __t ^(x);                            \
+    __w = ROL32(8, __w);                       \
+    (x) = ((uint64_t) __w << 32) | (__t ^ __w);        \
+  } while (0)
+#endif
+
 void
 camellia_set_encrypt_key(struct camellia_ctx *ctx,
 			 unsigned length, const uint8_t *key)
@@ -309,4 +320,17 @@ camellia_set_encrypt_key(struct camellia_ctx *ctx,
     }
   ctx->keys[i-2] = subkey[i-2];
   ctx->keys[i-1] = subkey[i] ^ subkey[i-1];
+
+#if !HAVE_NATIVE_64_BIT
+  for (i = 0; i < ctx->nkeys; i += 8)
+    {
+      /* apply the inverse of the last half of F-function */
+      CAMELLIA_F_HALF_INV(ctx->keys[i+1]);
+      CAMELLIA_F_HALF_INV(ctx->keys[i+2]);
+      CAMELLIA_F_HALF_INV(ctx->keys[i+3]);
+      CAMELLIA_F_HALF_INV(ctx->keys[i+4]);
+      CAMELLIA_F_HALF_INV(ctx->keys[i+5]);
+      CAMELLIA_F_HALF_INV(ctx->keys[i+6]);
+    }
+#endif
 }