From b6a47ff58a7831503c6fb53f0820773594fd12d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Thu, 23 Oct 2014 13:04:30 +0200
Subject: [PATCH] Change loop order in memxor.

---
 ChangeLog |  3 +++
 memxor.c  | 65 +++++++++++++++++++++++++++++++------------------------
 2 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f611ea2f..8311fec9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2014-10-23  Niels Möller  <nisse@lysator.liu.se>
 
+	* memxor.c (memxor_common_alignment, memxor_different_alignment)
+	(memxor): Change loop order, iterate from the end.
+
 	* examples/nettle-benchmark.c (time_memxor): Allocate buffers as
 	arrays of unsigned long, for more reliable alignment.
 
diff --git a/memxor.c b/memxor.c
index 99f46f37..e205aba8 100644
--- a/memxor.c
+++ b/memxor.c
@@ -70,13 +70,14 @@ memxor_common_alignment (word_t *dst, const word_t *src, size_t n)
 
   if (n & 1)
     {
-      *dst++ ^= *src++;
       n--;
+      dst[n] ^= src[n];
     }
-  for (; n >= 2; dst += 2, src += 2, n -= 2)
+  while (n >= 2)
     {
-      dst[0] ^= src[0];
-      dst[1] ^= src[1];
+      n -= 2;
+      dst[n+1] ^= src[n+1];
+      dst[n] ^= src[n];
     }
 }
 
@@ -86,7 +87,6 @@ memxor_common_alignment (word_t *dst, const word_t *src, size_t n)
 static void
 memxor_different_alignment (word_t *dst, const char *src, size_t n)
 {
-  size_t i;
   int shl, shr;
   const word_t *src_word;
   unsigned offset = ALIGN_OFFSET (src);
@@ -97,21 +97,23 @@ memxor_different_alignment (word_t *dst, const char *src, size_t n)
 
   src_word = (const word_t *) ((uintptr_t) src & -SIZEOF_LONG);
 
-  /* FIXME: Unroll four times, like memcmp? */
-  i = n & 1;
-  s0 = src_word[i];
-  if (i)
+  if (n & 1)
     {
-      s1 = src_word[0];
-      dst[0] ^= MERGE (s1, shl, s0, shr);
+      n--;
+      s1 = src_word[n];
+      s0 = src_word[n+1]; /* FIXME: Overread */
+      dst[n] ^= MERGE (s1, shl, s0, shr);
     }
+  else
+    s1 = src_word[n]; /* FIXME: Overread */
 
-  for (; i < n; i += 2)
+  while (n > 0)
     {
-      s1 = src_word[i+1];
-      dst[i] ^= MERGE(s0, shl, s1, shr);
-      s0 = src_word[i+2];
-      dst[i+1] ^= MERGE(s1, shl, s0, shr);
+      n -= 2;
+      s0 = src_word[n+1];
+      dst[n+1] ^= MERGE(s0, shl, s1, shr);
+      s1 = src_word[n]; /* FIXME: Overread on last iteration */
+      dst[n] ^= MERGE(s1, shl, s0, shr);
     }
 }
 
@@ -128,26 +130,33 @@ memxor(void *dst_in, const void *src_in, size_t n)
 
   if (n >= WORD_T_THRESH)
     {
+      unsigned i;
+      unsigned offset;
+      size_t nwords;
       /* There are at least some bytes to compare.  No need to test
 	 for N == 0 in this alignment loop.  */
-      while (ALIGN_OFFSET (dst))
+      for (i = ALIGN_OFFSET(dst + n); i > 0; i--)
 	{
-	  *dst++ ^= *src++;
 	  n--;
+	  dst[n] ^= src[n];
 	}
-      if (ALIGN_OFFSET (src))
-	memxor_different_alignment ((word_t *) dst, src, n / sizeof(word_t));
-      else
-	memxor_common_alignment ((word_t *) dst, (const word_t *) src, n / sizeof(word_t));
+      offset = ALIGN_OFFSET(src + n);
+      nwords = n / sizeof (word_t);
+      n %= sizeof (word_t);
 
-      dst += n & -SIZEOF_LONG;
-      src += n & -SIZEOF_LONG;
-      n = n & (SIZEOF_LONG - 1);
+      if (offset)
+	memxor_different_alignment ((word_t *) (dst+n), (src+n), nwords);
+      else
+	memxor_common_alignment ((word_t *) (dst+n),
+				 (const word_t *) (src+n), nwords);
+    }
+  while (n > 0)
+    {
+      n--;
+      dst[n] ^= src[n];
     }
-  for (; n > 0; n--)
-    *dst++ ^= *src++;
 
-  return dst_in;
+  return dst;
 }
 
 
-- 
GitLab