Commit 00a6c2d1 authored by Niels Möller's avatar Niels Möller
Browse files

* Makefile.in (DISTFILES): Added serpent-internal.h.

(nettle_SOURCES): Replaced serpent.c by serpent-set-key.c,
serpent-encrypt.c, and serpent-decrypt.c.

* serpent.c: Replaced by several new files.
* serpent-set-key.c: New file.
* serpent-encrypt.c: New file.
* serpent-decrypt.c: New file.
* serpent-internal.h: New file.

Rev: nettle/ChangeLog:1.176
Rev: nettle/Makefile.in:1.34
Rev: nettle/serpent-decrypt.c:1.1
Rev: nettle/serpent-encrypt.c:1.1
Rev: nettle/serpent-internal.h:1.1
Rev: nettle/serpent-set-key.c:1.1
Rev: nettle/serpent.c:1.9(DEAD)
parent 229f766b
2011-06-06 Niels Mller <nisse@lysator.liu.se> 2011-06-06 Niels Mller <nisse@lysator.liu.se>
* Makefile.in (DISTFILES): Added serpent-internal.h.
(nettle_SOURCES): Replaced serpent.c by serpent-set-key.c,
serpent-encrypt.c, and serpent-decrypt.c.
* serpent.c: Replaced by several new files.
* serpent-set-key.c: New file.
* serpent-encrypt.c: New file.
* serpent-decrypt.c: New file.
* serpent-internal.h: New file.
* serpent.c [HAVE_NATIVE_64_BIT]: Process two blocks at a time in * serpent.c [HAVE_NATIVE_64_BIT]: Process two blocks at a time in
parallel. Measured speedup of 10%--25% (higher for encryption) on parallel. Measured speedup of 10%--25% (higher for encryption) on
x86_64. x86_64.
......
...@@ -71,7 +71,8 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \ ...@@ -71,7 +71,8 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \
sha1.c sha1-compress.c sha1-meta.c \ sha1.c sha1-compress.c sha1-meta.c \
sha256.c sha256-compress.c sha224-meta.c sha256-meta.c \ sha256.c sha256-compress.c sha224-meta.c sha256-meta.c \
sha512.c sha512-compress.c sha384-meta.c sha512-meta.c \ sha512.c sha512-compress.c sha384-meta.c sha512-meta.c \
serpent.c serpent-meta.c \ serpent-set-key.c serpent-encrypt.c serpent-decrypt.c \
serpent-meta.c \
twofish.c twofish-meta.c \ twofish.c twofish-meta.c \
yarrow256.c yarrow_key_event.c \ yarrow256.c yarrow_key_event.c \
buffer.c buffer-init.c realloc.c \ buffer.c buffer-init.c realloc.c \
...@@ -127,7 +128,8 @@ DISTFILES = $(SOURCES) $(HEADERS) .bootstrap aclocal.m4 configure.ac \ ...@@ -127,7 +128,8 @@ DISTFILES = $(SOURCES) $(HEADERS) .bootstrap aclocal.m4 configure.ac \
config.h.in config.m4.in config.make.in Makefile.in \ config.h.in config.m4.in config.make.in Makefile.in \
README AUTHORS COPYING COPYING.LIB INSTALL NEWS TODO ChangeLog \ README AUTHORS COPYING COPYING.LIB INSTALL NEWS TODO ChangeLog \
memxor.c $(des_headers) descore.README \ memxor.c $(des_headers) descore.README \
aes-internal.h camellia-internal.h cast128_sboxes.h desinfo.h desCode.h \ aes-internal.h camellia-internal.h serpent-internal.h \
cast128_sboxes.h desinfo.h desCode.h \
nettle-internal.h nettle-write.h prime-list.h \ nettle-internal.h nettle-write.h prime-list.h \
asm.m4 \ asm.m4 \
nettle.texinfo nettle.info nettle.html nettle.pdf sha-example.c nettle.texinfo nettle.info nettle.html nettle.pdf sha-example.c
......
/* serpent.c /* serpent-decrypt.c
* *
* The serpent block cipher. * The serpent block cipher.
* *
...@@ -48,12 +48,7 @@ ...@@ -48,12 +48,7 @@
#include "serpent.h" #include "serpent.h"
#include "macros.h" #include "macros.h"
#include "serpent-internal.h"
/* Magic number, used during generating of the subkeys. */
#define PHI 0x9E3779B9
/* FIXME: Unify ROL macros used here, in camellia.c and cast128.c. */
#define ROL32(x,n) ((((x))<<(n)) | (((x))>>(32-(n))))
/* These are the S-Boxes of Serpent. They are copied from Serpents /* These are the S-Boxes of Serpent. They are copied from Serpents
reference implementation (the optimized one, contained in reference implementation (the optimized one, contained in
...@@ -74,29 +69,6 @@ ...@@ -74,29 +69,6 @@
/* FIXME: Except when used within the key schedule, the inputs are not /* FIXME: Except when used within the key schedule, the inputs are not
used after the substitution, and hence we could allow them to be used after the substitution, and hence we could allow them to be
destroyed. Can this freedom be used to optimize the sboxes? */ destroyed. Can this freedom be used to optimize the sboxes? */
#define SBOX0(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t05, t06, t07, t08, t09; \
type t11, t12, t13, t14, t15, t17, t01; \
t01 = b ^ c ; \
t02 = a | d ; \
t03 = a ^ b ; \
z = t02 ^ t01; \
t05 = c | z ; \
t06 = a ^ d ; \
t07 = b | c ; \
t08 = d & t05; \
t09 = t03 & t07; \
y = t09 ^ t08; \
t11 = t09 & y ; \
t12 = c ^ d ; \
t13 = t07 ^ t11; \
t14 = b & t06; \
t15 = t06 ^ t13; \
w = ~ t15; \
t17 = w ^ t14; \
x = t12 ^ t17; \
} while (0)
#define SBOX0_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX0_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
...@@ -123,30 +95,6 @@ ...@@ -123,30 +95,6 @@
w = t15 ^ t18; \ w = t15 ^ t18; \
} while (0) } while (0)
#define SBOX1(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t04, t05, t06, t07, t08; \
type t10, t11, t12, t13, t16, t17, t01; \
t01 = a | d ; \
t02 = c ^ d ; \
t03 = ~ b ; \
t04 = a ^ c ; \
t05 = a | t03; \
t06 = d & t04; \
t07 = t01 & t02; \
t08 = b | t06; \
y = t02 ^ t05; \
t10 = t07 ^ t08; \
t11 = t01 ^ t10; \
t12 = y ^ t11; \
t13 = b & d ; \
z = ~ t10; \
x = t13 ^ t12; \
t16 = t10 | x ; \
t17 = t05 & t16; \
w = c ^ t17; \
} while (0)
#define SBOX1_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX1_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
type t02, t03, t04, t05, t06, t07, t08; \ type t02, t03, t04, t05, t06, t07, t08; \
...@@ -171,28 +119,6 @@ ...@@ -171,28 +119,6 @@
w = t14 ^ t17; \ w = t14 ^ t17; \
} while (0) } while (0)
#define SBOX2(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t05, t06, t07, t08; \
type t09, t10, t12, t13, t14, t01; \
t01 = a | c ; \
t02 = a ^ b ; \
t03 = d ^ t01; \
w = t02 ^ t03; \
t05 = c ^ w ; \
t06 = b ^ t05; \
t07 = b | t05; \
t08 = t01 & t06; \
t09 = t03 ^ t07; \
t10 = t02 | t09; \
x = t10 ^ t08; \
t12 = a | d ; \
t13 = t09 ^ x ; \
t14 = b ^ t13; \
z = ~ t09; \
y = t12 ^ t14; \
} while (0)
#define SBOX2_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX2_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
type t02, t03, t04, t06, t07, t08, t09; \ type t02, t03, t04, t06, t07, t08, t09; \
...@@ -217,30 +143,6 @@ ...@@ -217,30 +143,6 @@
y = t16 ^ t17; \ y = t16 ^ t17; \
} while (0) } while (0)
#define SBOX3(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t04, t05, t06, t07, t08; \
type t09, t10, t11, t13, t14, t15, t01; \
t01 = a ^ c ; \
t02 = a | d ; \
t03 = a & d ; \
t04 = t01 & t02; \
t05 = b | t03; \
t06 = a & b ; \
t07 = d ^ t04; \
t08 = c | t06; \
t09 = b ^ t07; \
t10 = d & t05; \
t11 = t02 ^ t10; \
z = t08 ^ t09; \
t13 = d | z ; \
t14 = a | t07; \
t15 = b & t13; \
y = t08 ^ t11; \
w = t14 ^ t15; \
x = t05 ^ t04; \
} while (0)
#define SBOX3_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX3_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
type t02, t03, t04, t05, t06, t07, t09; \ type t02, t03, t04, t05, t06, t07, t09; \
...@@ -264,31 +166,6 @@ ...@@ -264,31 +166,6 @@
z = t14 ^ t16; \ z = t14 ^ t16; \
} while (0) } while (0)
#define SBOX4(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t04, t05, t06, t08, t09; \
type t10, t11, t12, t13, t14, t15, t16, t01; \
t01 = a | b ; \
t02 = b | c ; \
t03 = a ^ t02; \
t04 = b ^ d ; \
t05 = d | t03; \
t06 = d & t01; \
z = t03 ^ t06; \
t08 = z & t04; \
t09 = t04 & t05; \
t10 = c ^ t06; \
t11 = b & c ; \
t12 = t04 ^ t08; \
t13 = t11 | t03; \
t14 = t10 ^ t09; \
t15 = a & t05; \
t16 = t11 | t12; \
y = t13 ^ t08; \
x = t15 ^ t16; \
w = ~ t14; \
} while (0)
#define SBOX4_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX4_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
type t02, t03, t04, t05, t06, t07, t09; \ type t02, t03, t04, t05, t06, t07, t09; \
...@@ -312,29 +189,6 @@ ...@@ -312,29 +189,6 @@
w = t15 ^ t09; \ w = t15 ^ t09; \
} while (0) } while (0)
#define SBOX5(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t04, t05, t07, t08, t09; \
type t10, t11, t12, t13, t14, t01; \
t01 = b ^ d ; \
t02 = b | d ; \
t03 = a & t01; \
t04 = c ^ t02; \
t05 = t03 ^ t04; \
w = ~ t05; \
t07 = a ^ t01; \
t08 = d | w ; \
t09 = b | t05; \
t10 = d ^ t08; \
t11 = b | t07; \
t12 = t03 | w ; \
t13 = t07 | t10; \
t14 = t01 ^ t11; \
y = t09 ^ t13; \
x = t07 ^ t08; \
z = t12 ^ t14; \
} while (0)
#define SBOX5_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX5_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
type t02, t03, t04, t05, t07, t08, t09; \ type t02, t03, t04, t05, t07, t08, t09; \
...@@ -358,31 +212,6 @@ ...@@ -358,31 +212,6 @@
y = t16 ^ t15; \ y = t16 ^ t15; \
} while (0) } while (0)
#define SBOX6(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t04, t05, t07, t08, t09, t10; \
type t11, t12, t13, t15, t17, t18, t01; \
t01 = a & d ; \
t02 = b ^ c ; \
t03 = a ^ d ; \
t04 = t01 ^ t02; \
t05 = b | c ; \
x = ~ t04; \
t07 = t03 & t05; \
t08 = b & x ; \
t09 = a | c ; \
t10 = t07 ^ t08; \
t11 = b | d ; \
t12 = c ^ t11; \
t13 = t09 ^ t10; \
y = ~ t13; \
t15 = x & t03; \
z = t12 ^ t07; \
t17 = a ^ b ; \
t18 = y ^ t15; \
w = t17 ^ t18; \
} while (0)
#define SBOX6_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX6_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
type t02, t03, t04, t05, t06, t07, t08, t09; \ type t02, t03, t04, t05, t06, t07, t08, t09; \
...@@ -408,31 +237,6 @@ ...@@ -408,31 +237,6 @@
y = t16 ^ t14; \ y = t16 ^ t14; \
} while (0) } while (0)
#define SBOX7(type, a, b, c, d, w, x, y, z) \
do { \
type t02, t03, t04, t05, t06, t08, t09, t10; \
type t11, t13, t14, t15, t16, t17, t01; \
t01 = a & c ; \
t02 = ~ d ; \
t03 = a & t02; \
t04 = b | t01; \
t05 = a & b ; \
t06 = c ^ t04; \
z = t03 ^ t06; \
t08 = c | z ; \
t09 = d | t05; \
t10 = a ^ t08; \
t11 = t04 & z ; \
x = t09 ^ t10; \
t13 = b ^ x ; \
t14 = t01 ^ x ; \
t15 = c ^ t05; \
t16 = t11 | t13; \
t17 = t02 | t14; \
w = t15 ^ t17; \
y = a ^ t16; \
} while (0)
#define SBOX7_INVERSE(type, a, b, c, d, w, x, y, z) \ #define SBOX7_INVERSE(type, a, b, c, d, w, x, y, z) \
do { \ do { \
type t02, t03, t04, t06, t07, t08, t09; \ type t02, t03, t04, t06, t07, t08, t09; \
...@@ -457,21 +261,6 @@ ...@@ -457,21 +261,6 @@
y = t14 ^ t16; \ y = t14 ^ t16; \
} while (0) } while (0)
/* In-place linear transformation. */
#define LINEAR_TRANSFORMATION(x0,x1,x2,x3) \
do { \
x0 = ROL32 (x0, 13); \
x2 = ROL32 (x2, 3); \
x1 = x1 ^ x0 ^ x2; \
x3 = x3 ^ x2 ^ (x0 << 3); \
x1 = ROL32 (x1, 1); \
x3 = ROL32 (x3, 7); \
x0 = x0 ^ x1 ^ x3; \
x2 = x2 ^ x3 ^ (x1 << 7); \
x0 = ROL32 (x0, 5); \
x2 = ROL32 (x2, 22); \
} while (0)
/* In-place inverse linear transformation. */ /* In-place inverse linear transformation. */
#define LINEAR_TRANSFORMATION_INVERSE(x0,x1,x2,x3) \ #define LINEAR_TRANSFORMATION_INVERSE(x0,x1,x2,x3) \
do { \ do { \
...@@ -487,23 +276,6 @@ ...@@ -487,23 +276,6 @@
x0 = ROL32 (x0, 19); \ x0 = ROL32 (x0, 19); \
} while (0) } while (0)
#define KEYXOR(x0,x1,x2,x3, subkey) \
do { \
(x0) ^= (subkey)[0]; \
(x1) ^= (subkey)[1]; \
(x2) ^= (subkey)[2]; \
(x3) ^= (subkey)[3]; \
} while (0)
/* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
y0,y1,y2,y3. */
#define ROUND(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
do { \
KEYXOR(x0,x1,x2,x3, subkey); \
SBOX##which(uint32_t, x0,x1,x2,x3, y0,y1,y2,y3); \
LINEAR_TRANSFORMATION(y0,y1,y2,y3); \
} while (0)
/* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are /* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
y0,y1,y2,y3. */ y0,y1,y2,y3. */
#define ROUND_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \ #define ROUND_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
...@@ -514,36 +286,6 @@ ...@@ -514,36 +286,6 @@
} while (0) } while (0)
#if HAVE_NATIVE_64_BIT #if HAVE_NATIVE_64_BIT
/* Operate independently on both halves of a 64-bit word. */
#define ROL64(x,n) \
(((x) << (n) & ~(((1L << (n))-1) << 32)) \
|(((x) >> (32-(n))) & ~(((1L << (32-(n)))-1) << (n))))
#define KEYXOR64(x0,x1,x2,x3, subkey) \
do { \
uint64_t _sk; \
_sk = (subkey)[0]; _sk |= _sk << 32; (x0) ^= _sk; \
_sk = (subkey)[1]; _sk |= _sk << 32; (x1) ^= _sk; \
_sk = (subkey)[2]; _sk |= _sk << 32; (x2) ^= _sk; \
_sk = (subkey)[3]; _sk |= _sk << 32; (x3) ^= _sk; \
} while (0)
#define RSHIFT64(x,n) \
( ((x) << (n)) & ~(((1L << n) - 1) << 32))
#define LINEAR_TRANSFORMATION64(x0,x1,x2,x3) \
do { \
x0 = ROL64 (x0, 13); \
x2 = ROL64 (x2, 3); \
x1 = x1 ^ x0 ^ x2; \
x3 = x3 ^ x2 ^ RSHIFT64(x0, 3); \
x1 = ROL64 (x1, 1); \
x3 = ROL64 (x3, 7); \
x0 = x0 ^ x1 ^ x3; \
x2 = x2 ^ x3 ^ RSHIFT64(x1, 7); \
x0 = ROL64 (x0, 5); \
x2 = ROL64 (x2, 22); \
} while (0)
/* In-place inverse linear transformation. */ /* In-place inverse linear transformation. */
#define LINEAR_TRANSFORMATION64_INVERSE(x0,x1,x2,x3) \ #define LINEAR_TRANSFORMATION64_INVERSE(x0,x1,x2,x3) \
...@@ -560,13 +302,6 @@ ...@@ -560,13 +302,6 @@
x0 = ROL64 (x0, 19); \ x0 = ROL64 (x0, 19); \
} while (0) } while (0)
#define ROUND64(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
do { \
KEYXOR64(x0,x1,x2,x3, subkey); \
SBOX##which(uint64_t, x0,x1,x2,x3, y0,y1,y2,y3); \
LINEAR_TRANSFORMATION64(y0,y1,y2,y3); \
} while (0)
#define ROUND64_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \ #define ROUND64_INVERSE(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
do { \ do { \
LINEAR_TRANSFORMATION64_INVERSE (x0,x1,x2,x3); \ LINEAR_TRANSFORMATION64_INVERSE (x0,x1,x2,x3); \
...@@ -574,188 +309,7 @@ ...@@ -574,188 +309,7 @@
KEYXOR64(y0,y1,y2,y3, subkey); \ KEYXOR64(y0,y1,y2,y3, subkey); \
} while (0) } while (0)
#endif
/* Key schedule */
/* Note: Increments k */
#define KS_RECURRENCE(w, i, k) \
do { \
uint32_t _wn = (w)[(i)] ^ (w)[((i)+3)&7] ^ w[((i)+5)&7] \
^ w[((i)+7)&7] ^ PHI ^ (k)++; \
((w)[(i)] = ROL32(_wn, 11)); \
} while (0)
/* Note: Increments k four times and keys once */
#define KS(keys, s, w, i, k) \
do { \
KS_RECURRENCE(w, (i), (k)); \
KS_RECURRENCE(w, (i)+1, (k)); \
KS_RECURRENCE(w, (i)+2, (k)); \
KS_RECURRENCE(w, (i)+3, (k)); \
SBOX##s(uint32_t, w[(i)],w[(i)+1],w[(i)+2],w[(i)+3], \
(*keys)[0],(*keys)[1],(*keys)[2],(*keys)[3]); \
(keys)++; \
} while (0)
/* Pad user key and convert to an array of 8 uint32_t. */
static void
serpent_key_pad (const uint8_t *key, unsigned int key_length,
uint32_t *w)
{
unsigned int i;
assert (key_length <= SERPENT_MAX_KEY_SIZE);
for (i = 0; key_length >= 4; key_length -=4, key += 4)
w[i++] = LE_READ_UINT32(key);
if (i < 8)
{
/* Key must be padded according to the Serpent specification.
"aabbcc" -> "aabbcc0100...00" -> 0x01ccbbaa. */
uint32_t pad = 0x01;
while (key_length > 0)
pad = pad << 8 | key[--key_length];
w[i++] = pad;
while (i < 8)
w[i++] = 0;
}
}
/* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */
void
serpent_set_key (struct serpent_ctx *ctx,
unsigned length, const uint8_t * key)
{
uint32_t w[8];
uint32_t (*keys)[4];
unsigned k;
serpent_key_pad (key, length, w);
/* Derive the 33 subkeys from KEY and store them in SUBKEYS. We do
the recurrence in the key schedule using W as a circular buffer
of just 8 uint32_t. */
/* FIXME: Would be better to invoke SBOX with scalar variables as
arguments, no arrays. To do that, unpack w into separate
variables, use temporary variables as the SBOX destination. */
keys = ctx->keys;
k = 0;
for (;;)
{
KS(keys, 3, w, 0, k);
if (k == 132)
break;
KS(keys, 2, w, 4, k);
KS(keys, 1, w, 0, k);
KS(keys, 0, w, 4, k);
KS(keys, 7, w, 0, k);
KS(keys, 6, w, 4, k);
KS(keys, 5, w, 0, k);
KS(keys, 4, w, 4, k);
}
assert (keys == ctx->keys + 33);
}
void
serpent_encrypt (const struct serpent_ctx *ctx,
unsigned length, uint8_t * dst, const uint8_t * src)
{
assert( !(length % SERPENT_BLOCK_SIZE));
#if HAVE_NATIVE_64_BIT
if (length & SERPENT_BLOCK_SIZE)
#else
while (length >= SERPENT_BLOCK_SIZE)
#endif
{
uint32_t x0,x1,x2,x3, y0,y1,y2,y3;
unsigned k;
x0 = LE_READ_UINT32 (src);
x1 = LE_READ_UINT32 (src + 4);
x2 = LE_READ_UINT32 (src + 8);
x3 = LE_READ_UINT32 (src + 12);
for (k = 0; ; k += 8)
{
ROUND (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
ROUND (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
ROUND (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
ROUND (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
ROUND (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
ROUND (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
ROUND (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
if (k == 24)
break;
ROUND (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
}
/* Special final round, using two subkeys. */
KEYXOR (y0,y1,y2,y3, ctx->keys[31]);
SBOX7 (uint32_t, y0,y1,y2,y3, x0,x1,x2,x3);
KEYXOR (x0,x1,x2,x3, ctx->keys[32]);
LE_WRITE_UINT32 (dst, x0);
LE_WRITE_UINT32 (dst + 4, x1);
LE_WRITE_UINT32 (dst + 8, x2);
LE_WRITE_UINT32 (dst + 12, x3);
src += SERPENT_BLOCK_SIZE;
dst += SERPENT_BLOCK_SIZE;
length -= SERPENT_BLOCK_SIZE;
}
#if HAVE_NATIVE_64_BIT
FOR_BLOCKS(length, dst, src, 2*SERPENT_BLOCK_SIZE)
{
uint64_t x0,x1,x2,x3, y0,y1,y2,y3;
unsigned k;
x0 = LE_READ_UINT32 (src);
x1 = LE_READ_UINT32 (src + 4);
x2 = LE_READ_UINT32 (src + 8);
x3 = LE_READ_UINT32 (src + 12);
x0 <<= 32; x0 |= LE_READ_UINT32 (src + 16);
x1 <<= 32; x1 |= LE_READ_UINT32 (src + 20);
x2 <<= 32; x2 |= LE_READ_UINT32 (src + 24);
x3 <<= 32; x3 |= LE_READ_UINT32 (src + 28);
for (k = 0; ; k += 8)
{
ROUND64 (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
ROUND64 (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
ROUND64 (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
ROUND64 (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
ROUND64 (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
ROUND64 (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
ROUND64 (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
if (k == 24)
break;
ROUND64 (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
}
/* Special final round, using two subkeys. */
KEYXOR64 (y0,y1,y2,y3, ctx->keys[31]);
SBOX7 (uint64_t, y0,y1,y2,y3, x0,x1,x2,x3);
KEYXOR64 (x0,x1,x2,x3, ctx->keys[32]);
LE_WRITE_UINT32 (dst + 16, x0);
LE_WRITE_UINT32 (dst + 20, x1);
LE_WRITE_UINT32 (dst + 24, x2);
LE_WRITE_UINT32 (dst + 28, x3);
x0 >>= 32; LE_WRITE_UINT32 (dst, x0);
x1 >>= 32; LE_WRITE_UINT32 (dst + 4, x1);
x2 >>= 32; LE_WRITE_UINT32 (dst + 8, x2);
x3 >>= 32; LE_WRITE_UINT32 (dst + 12, x3);
}
#endif /* HAVE_NATIVE_64_BIT */ #endif /* HAVE_NATIVE_64_BIT */