Skip to content
Snippets Groups Projects
Commit 56d0bc72 authored by Niels Möller's avatar Niels Möller
Browse files

(memxor3): Optimized.

(memxor3_common_alignment): New function.
(memxor3_different_alignment_b): New function.
(memxor3_different_alignment_ab): New function.
(memxor3_different_alignment_all): New function.

Rev: nettle/memxor.c:1.3
parent 5a3187a3
Branches
Tags
No related merge requests found
......@@ -23,7 +23,8 @@
* MA 02111-1307, USA.
*/
/* Implementation inspired by memcmp in glibc, contributed to the FSF by Torbjorn Granlund.
/* Implementation inspired by memcmp in glibc, contributed to the FSF
by Torbjorn Granlund.
*/
#if HAVE_CONFIG_H
......@@ -34,7 +35,6 @@
#include "memxor.h"
#if 1
typedef unsigned long int word_t;
#if SIZEOF_LONG & (SIZEOF_LONG - 1)
......@@ -55,16 +55,19 @@ typedef unsigned long int word_t;
static void
memxor_common_alignment (word_t *dst, const word_t *src, size_t n)
{
size_t i;
/* FIXME: Unroll four times, like memcmp? */
i = n & 1;
if (i)
dst[0] ^= src[0];
/* FIXME: Require n > 0? */
/* FIXME: Unroll four times, like memcmp? Probably not worth the
effort. */
for (; i < n; i += 2)
if (n & 1)
{
dst[i] ^= src[i];
dst[i+1] ^= src[i+1];
*dst++ ^= *src++;
n--;
}
for (; n >= 2; dst += 2, src += 2, n -= 2)
{
dst[0] ^= src[0];
dst[1] ^= src[1];
}
}
......@@ -83,7 +86,7 @@ memxor_different_alignment (word_t *dst, const uint8_t *src, size_t n)
shl = CHAR_BIT * offset;
shr = CHAR_BIT * (sizeof(word_t) - offset);
src_word = (const word_t *) (src - offset);
src_word = (const word_t *) ((uintptr_t) src & -SIZEOF_LONG);
/* FIXME: Unroll four times, like memcmp? */
i = n & 1;
......@@ -91,7 +94,6 @@ memxor_different_alignment (word_t *dst, const uint8_t *src, size_t n)
if (i)
{
s1 = src_word[0];
s0 = src_word[1];
dst[0] ^= MERGE (s1, shl, s0, shr);
}
......@@ -104,6 +106,9 @@ memxor_different_alignment (word_t *dst, const uint8_t *src, size_t n)
}
}
/* Performance, Intel SU1400 (x86_64): 0.25 cycles/byte aligned, 0.45
cycles/byte unaligned. */
/* XOR LEN bytes starting at SRCADDR onto DESTADDR. Result undefined
if the source overlaps with the destination. Return DESTADDR. */
uint8_t *
......@@ -113,8 +118,6 @@ memxor(uint8_t *dst, const uint8_t *src, size_t n)
if (n >= WORD_T_THRESH)
{
size_t left_over;
/* There are at least some bytes to compare. No need to test
for N == 0 in this alignment loop. */
while (ALIGN_OFFSET (dst))
......@@ -127,35 +130,195 @@ memxor(uint8_t *dst, const uint8_t *src, size_t n)
else
memxor_common_alignment ((word_t *) dst, (const word_t *) src, n / sizeof(word_t));
left_over = n % sizeof(word_t);
dst += n - left_over;
src += n - left_over;
n = left_over;
dst += n & -SIZEOF_LONG;
src += n & -SIZEOF_LONG;
n = n & (SIZEOF_LONG - 1);
}
for (; n > 0; n--)
*dst++ ^= *src++;
return orig_dst;
}
#else
uint8_t *
memxor(uint8_t *dst, const uint8_t *src, size_t n)
/* XOR word-aligned areas. n is the number of words, not bytes. */
static void
memxor3_common_alignment (word_t *dst,
const word_t *a, const word_t *b, size_t n)
{
size_t i;
for (i = 0; i<n; i++)
dst[i] ^= src[i];
/* FIXME: Require n > 0? */
while (n-- > 0)
dst[n] = a[n] ^ b[n];
}
return dst;
static void
memxor3_different_alignment_b (word_t *dst,
const word_t *a, const uint8_t *b, unsigned offset, size_t n)
{
int shl, shr;
const word_t *b_word;
word_t s0, s1;
shl = CHAR_BIT * offset;
shr = CHAR_BIT * (sizeof(word_t) - offset);
b_word = (const word_t *) ((uintptr_t) b & -SIZEOF_LONG);
if (n & 1)
{
n--;
s1 = b_word[n];
s0 = b_word[n+1];
dst[n] = a[n] ^ MERGE (s1, shl, s0, shr);
}
#endif
else
s1 = b_word[n];
while (n > 0)
{
n -= 2;
s0 = b_word[n+1];
dst[n+1] = a[n+1] ^ MERGE(s0, shl, s1, shr);
s1 = b_word[n];
dst[n] = a[n] ^ MERGE(s1, shl, s0, shr);
}
}
static void
memxor3_different_alignment_ab (word_t *dst,
const uint8_t *a, const uint8_t *b,
unsigned offset, size_t n)
{
int shl, shr;
const word_t *a_word;
const word_t *b_word;
word_t s0, s1;
shl = CHAR_BIT * offset;
shr = CHAR_BIT * (sizeof(word_t) - offset);
a_word = (const word_t *) ((uintptr_t) a & -SIZEOF_LONG);
b_word = (const word_t *) ((uintptr_t) b & -SIZEOF_LONG);
if (n & 1)
{
n--;
s1 = a_word[n] ^ b_word[n];
s0 = a_word[n+1] ^ b_word[n+1];
dst[n] = MERGE (s1, shl, s0, shr);
}
else
s1 = a_word[n] ^ b_word[n];
while (n > 0)
{
n -= 2;
s0 = a_word[n+1] ^ b_word[n+1];
dst[n+1] = MERGE(s0, shl, s1, shr);
s1 = a_word[n] ^ b_word[n];
dst[n] = MERGE(s1, shl, s0, shr);
}
}
static void
memxor3_different_alignment_all (word_t *dst,
const uint8_t *a, const uint8_t *b,
unsigned a_offset, unsigned b_offset,
size_t n)
{
int al, ar, bl, br;
const word_t *a_word;
const word_t *b_word;
word_t a0, a1, b0, b1;
al = CHAR_BIT * a_offset;
ar = CHAR_BIT * (sizeof(word_t) - a_offset);
bl = CHAR_BIT * b_offset;
br = CHAR_BIT * (sizeof(word_t) - b_offset);
a_word = (const word_t *) ((uintptr_t) a & -SIZEOF_LONG);
b_word = (const word_t *) ((uintptr_t) b & -SIZEOF_LONG);
if (n & 1)
{
n--;
a1 = a_word[n]; a0 = a_word[n+1];
b1 = b_word[n]; b0 = b_word[n+1];
dst[n] = MERGE (a1, al, a0, ar) ^ MERGE (b1, bl, b0, br);
}
else
{
a1 = a_word[n];
b1 = b_word[n];
}
while (n > 0)
{
n -= 2;
a0 = a_word[n+1]; b0 = b_word[n+1];
dst[n+1] = MERGE(a0, al, a1, ar) ^ MERGE(b0, bl, b1, br);
a1 = a_word[n]; b1 = b_word[n];
dst[n] = MERGE(a1, al, a0, ar) ^ MERGE(b1, bl, b0, br);
}
}
/* Current implementation processes data in descending order, to
support overlapping operation with one of the sources overlapping
the start of the destination area. This feature is used only
internally by cbc decrypt, and it is not advertised or documented
to nettle users. */
uint8_t *
memxor3(uint8_t *dst, const uint8_t *a, const uint8_t *b, size_t n)
{
size_t i;
for (i = 0; i<n; i++)
dst[i] = a[i] ^ b[i];
if (n >= WORD_T_THRESH)
{
unsigned i;
unsigned a_offset;
unsigned b_offset;
size_t nwords;
return dst;
for (i = ALIGN_OFFSET(dst + n); i > 0; i--)
{
n--;
dst[n] = a[n] ^ b[n];
}
a_offset = ALIGN_OFFSET(a + n);
b_offset = ALIGN_OFFSET(b + n);
nwords = n / sizeof (word_t);
n %= sizeof (word_t);
if (a_offset == b_offset)
{
if (!a_offset)
memxor3_common_alignment((word_t *) (dst + n),
(const word_t *) (a + n),
(const word_t *) (b + n), nwords);
else
memxor3_different_alignment_ab((word_t *) (dst + n),
a + n, b + n, a_offset,
nwords);
}
else if (!a_offset)
memxor3_different_alignment_b((word_t *) (dst + n),
(const word_t *) (a + n), b + n,
b_offset, nwords);
else if (!b_offset)
memxor3_different_alignment_b((word_t *) (dst + n),
(const word_t *) (b + n), a + n,
a_offset, nwords);
else
memxor3_different_alignment_all((word_t *) (dst + n), a + n, b + n,
a_offset, b_offset, nwords);
}
while (n-- > 0)
dst[n] = a[n] ^ b[n];
return dst;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment