diff --git a/ChangeLog b/ChangeLog
index a79ed6161d25eb7f0e9807c5f87739507a22e4c5..653640687fbf55a6a6cbab006cfb9f85ff7d3b0e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2014-10-23  Niels Möller  <nisse@lysator.liu.se>
 
+	* memxor.c (memxor_different_alignment): Avoid out-of-bounds
+	reads, corresponding to valgrind's --partial-loads-ok.
+
 	* configure.ac (asm_replace_list): Deleted memxor.asm, now
 	incompatible with the memxor/memxor3 split.
 
diff --git a/memxor.c b/memxor.c
index a252a13deecc884b663d87158f225a7cc4dc048e..e2e300d3eb1508dfc13d9c8a4a1043ad06e2910e 100644
--- a/memxor.c
+++ b/memxor.c
@@ -37,6 +37,7 @@
 # include "config.h"
 #endif
 
+#include <assert.h>
 #include <limits.h>
 
 #include "memxor.h"
@@ -69,29 +70,44 @@ memxor_common_alignment (word_t *dst, const word_t *src, size_t n)
    words, not bytes. Assumes we can read complete words at the start
    and end of the src operand. */
 static void
-memxor_different_alignment (word_t *dst, const char *src, size_t n)
+memxor_different_alignment (word_t *dst, const unsigned char *src, size_t n)
 {
   int shl, shr;
   const word_t *src_word;
   unsigned offset = ALIGN_OFFSET (src);
   word_t s0, s1;
+  const unsigned char *part;
+  unsigned i;
 
+  assert (n > 0);
   shl = CHAR_BIT * offset;
   shr = CHAR_BIT * (sizeof(word_t) - offset);
 
   src_word = (const word_t *) ((uintptr_t) src & -sizeof(word_t));
 
+  /* Read top offset bytes, in native byte order. */
+  part = src + n*sizeof(word_t) - offset;
+#if WORDS_BIGENDIAN
+  for (s0 = part[0], i = 1; i < offset; i++)
+    s0 = (s0 << CHAR_BIT) | part[i];
+  s0 <<= shr; /* FIXME: Eliminate this shift? */
+#else /* !WORDS_BIGENDIAN */
+  for (i = offset, s0 = part[--i]; i > 0 ; )
+    s0 = (s0 << CHAR_BIT) | part[--i];
+#endif /* !WORDS_BIGENDIAN */
+
+  /* Do n-1 regular iterations */
   if (n & 1)
+    s1 = s0;
+  else
     {
       n--;
       s1 = src_word[n];
-      s0 = src_word[n+1]; /* FIXME: Overread */
       dst[n] ^= MERGE (s1, shl, s0, shr);
     }
-  else
-    s1 = src_word[n]; /* FIXME: Overread */
 
-  while (n > 0)
+  assert (n & 1);
+  while (n > 2)
     {
       n -= 2;
       s0 = src_word[n+1];
@@ -99,6 +115,17 @@ memxor_different_alignment (word_t *dst, const char *src, size_t n)
       s1 = src_word[n]; /* FIXME: Overread on last iteration */
       dst[n] ^= MERGE(s1, shl, s0, shr);
     }
+  assert (n == 1);
+  /* Read low wordsize - offset bytes */
+#if WORDS_BIGENDIAN
+  for (s0 = src[0], i = 1; i < sizeof(word_t) - offset; i++)
+    s0 = (s0 << CHAR_BIT) | src[i];
+#else /* !WORDS_BIGENDIAN */
+  for (i = sizeof(word_t) - offset, s0 = src[--i]; i > 0 ; )
+    s0 = (s0 << CHAR_BIT) | src[--i];
+  s0 <<= shl; /* FIXME: eliminate shift? */
+#endif /* !WORDS_BIGENDIAN */
+  dst[0] ^= MERGE(s0, shl, s1, shr);
 }
 
 /* Performance, Intel SU1400 (x86_64): 0.25 cycles/byte aligned, 0.45
@@ -109,8 +136,8 @@ memxor_different_alignment (word_t *dst, const char *src, size_t n)
 void *
 memxor(void *dst_in, const void *src_in, size_t n)
 {
-  char *dst = dst_in;
-  const char *src = src_in;
+  unsigned char *dst = dst_in;
+  const unsigned char *src = src_in;
 
   if (n >= WORD_T_THRESH)
     {
@@ -129,7 +156,7 @@ memxor(void *dst_in, const void *src_in, size_t n)
       n %= sizeof (word_t);
 
       if (offset)
-	memxor_different_alignment ((word_t *) (dst+n), (src+n), nwords);
+	memxor_different_alignment ((word_t *) (dst+n), src+n, nwords);
       else
 	memxor_common_alignment ((word_t *) (dst+n),
 				 (const word_t *) (src+n), nwords);