memxor.c 4.39 KB
Newer Older
Niels Möller's avatar
Niels Möller committed
1 2
/* memxor.c

3
   Copyright (C) 2010, 2014 Niels Möller
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

   This file is part of GNU Nettle.

   GNU Nettle is free software: you can redistribute it and/or
   modify it under the terms of either:

     * the GNU Lesser General Public License as published by the Free
       Software Foundation; either version 3 of the License, or (at your
       option) any later version.

   or

     * the GNU General Public License as published by the Free
       Software Foundation; either version 2 of the License, or (at your
       option) any later version.

   or both in parallel, as here.

   GNU Nettle is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received copies of the GNU General Public License and
   the GNU Lesser General Public License along with this program.  If
   not, see http://www.gnu.org/licenses/.
*/
31

Niels Möller's avatar
Niels Möller committed
32 33
/* Implementation inspired by memcmp in glibc, contributed to the FSF
   by Torbjorn Granlund.
34
 */
Niels Möller's avatar
Niels Möller committed
35

36
#if HAVE_CONFIG_H
37
# include "config.h"
38 39
#endif

40
#include <assert.h>
41 42
#include <limits.h>

Niels Möller's avatar
Niels Möller committed
43
#include "memxor.h"
44
#include "memxor-internal.h"
45 46 47 48 49 50 51

#define WORD_T_THRESH 16

/* XOR word-aligned areas. n is the number of words, not bytes. */
static void
memxor_common_alignment (word_t *dst, const word_t *src, size_t n)
{
Niels Möller's avatar
Niels Möller committed
52 53 54
  /* FIXME: Require n > 0? */
  /* FIXME: Unroll four times, like memcmp? Probably not worth the
     effort. */
55

Niels Möller's avatar
Niels Möller committed
56
  if (n & 1)
57
    {
Niels Möller's avatar
Niels Möller committed
58
      n--;
Niels Möller's avatar
Niels Möller committed
59
      dst[n] ^= src[n];
Niels Möller's avatar
Niels Möller committed
60
    }
Niels Möller's avatar
Niels Möller committed
61
  while (n >= 2)
Niels Möller's avatar
Niels Möller committed
62
    {
Niels Möller's avatar
Niels Möller committed
63 64 65
      n -= 2;
      dst[n+1] ^= src[n+1];
      dst[n] ^= src[n];
66 67 68
    }
}

Niels Möller's avatar
Niels Möller committed
69
/* XOR *un-aligned* src-area onto aligned dst area. n is number of
70 71 72
   words, not bytes. Assumes we can read complete words at the start
   and end of the src operand. */
static void
73
memxor_different_alignment (word_t *dst, const unsigned char *src, size_t n)
74 75 76 77 78
{
  int shl, shr;
  const word_t *src_word;
  unsigned offset = ALIGN_OFFSET (src);
  word_t s0, s1;
79 80
  const unsigned char *part;
  unsigned i;
81

82
  assert (n > 0);
83 84 85
  shl = CHAR_BIT * offset;
  shr = CHAR_BIT * (sizeof(word_t) - offset);

86
  src_word = (const word_t *) ((uintptr_t) src & -sizeof(word_t));
87

88 89 90 91 92 93 94 95 96 97 98 99
  /* Read top offset bytes, in native byte order. */
  part = src + n*sizeof(word_t) - offset;
#if WORDS_BIGENDIAN
  for (s0 = part[0], i = 1; i < offset; i++)
    s0 = (s0 << CHAR_BIT) | part[i];
  s0 <<= shr; /* FIXME: Eliminate this shift? */
#else /* !WORDS_BIGENDIAN */
  for (i = offset, s0 = part[--i]; i > 0 ; )
    s0 = (s0 << CHAR_BIT) | part[--i];
#endif /* !WORDS_BIGENDIAN */

  /* Do n-1 regular iterations */
Niels Möller's avatar
Niels Möller committed
100
  if (n & 1)
101 102
    s1 = s0;
  else
103
    {
Niels Möller's avatar
Niels Möller committed
104 105 106
      n--;
      s1 = src_word[n];
      dst[n] ^= MERGE (s1, shl, s0, shr);
107 108
    }

109 110
  assert (n & 1);
  while (n > 2)
111
    {
Niels Möller's avatar
Niels Möller committed
112 113 114 115 116
      n -= 2;
      s0 = src_word[n+1];
      dst[n+1] ^= MERGE(s0, shl, s1, shr);
      s1 = src_word[n]; /* FIXME: Overread on last iteration */
      dst[n] ^= MERGE(s1, shl, s0, shr);
117
    }
118 119 120 121 122 123 124 125 126 127 128
  assert (n == 1);
  /* Read low wordsize - offset bytes */
#if WORDS_BIGENDIAN
  for (s0 = src[0], i = 1; i < sizeof(word_t) - offset; i++)
    s0 = (s0 << CHAR_BIT) | src[i];
#else /* !WORDS_BIGENDIAN */
  for (i = sizeof(word_t) - offset, s0 = src[--i]; i > 0 ; )
    s0 = (s0 << CHAR_BIT) | src[--i];
  s0 <<= shl; /* FIXME: eliminate shift? */
#endif /* !WORDS_BIGENDIAN */
  dst[0] ^= MERGE(s0, shl, s1, shr);
129 130
}

Niels Möller's avatar
Niels Möller committed
131 132 133
/* Performance, Intel SU1400 (x86_64): 0.25 cycles/byte aligned, 0.45
   cycles/byte unaligned. */

134 135
/* XOR LEN bytes starting at SRCADDR onto DESTADDR. Result undefined
   if the source overlaps with the destination. Return DESTADDR. */
136 137
void *
memxor(void *dst_in, const void *src_in, size_t n)
138
{
139 140
  unsigned char *dst = dst_in;
  const unsigned char *src = src_in;
141 142 143

  if (n >= WORD_T_THRESH)
    {
Niels Möller's avatar
Niels Möller committed
144 145 146
      unsigned i;
      unsigned offset;
      size_t nwords;
147 148
      /* There are at least some bytes to compare.  No need to test
	 for N == 0 in this alignment loop.  */
Niels Möller's avatar
Niels Möller committed
149
      for (i = ALIGN_OFFSET(dst + n); i > 0; i--)
150 151
	{
	  n--;
Niels Möller's avatar
Niels Möller committed
152
	  dst[n] ^= src[n];
153
	}
Niels Möller's avatar
Niels Möller committed
154 155 156
      offset = ALIGN_OFFSET(src + n);
      nwords = n / sizeof (word_t);
      n %= sizeof (word_t);
157

Niels Möller's avatar
Niels Möller committed
158
      if (offset)
159
	memxor_different_alignment ((word_t *) (dst+n), src+n, nwords);
Niels Möller's avatar
Niels Möller committed
160 161 162 163 164 165 166 167
      else
	memxor_common_alignment ((word_t *) (dst+n),
				 (const word_t *) (src+n), nwords);
    }
  while (n > 0)
    {
      n--;
      dst[n] ^= src[n];
168 169
    }

Niels Möller's avatar
Niels Möller committed
170
  return dst;
171
}