diff --git a/ChangeLog b/ChangeLog index df345cae5264a23dbeffa5b661593b5dbc344c81..54581dbad62fd15857f2796aa4d3fa96346d245f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2011-06-01 Niels M�ller <nisse@lysator.liu.se> + + * serpent.c: Added do { ... } while (0) around block macros. + (serpent_key_t): Deleted array typedef. + (ROL32, ROR32): Renamed macros, were rol and ror. + (KS_RECURRENCE, KS): New macros. + (serpent_key_pad): Renamed, from... + (serpent_key_prepare): ...old name. + (serpent_subkeys_generate): Deleted function. + (serpent_set_key): Rewrote the generation of subkeys. Reduced both + temporary storage and code size (less unrolling) + 2011-05-31 Niels M�ller <nisse@lysator.liu.se> * testsuite/serpent-test.c (test_main): Enabled test with short, diff --git a/serpent.c b/serpent.c index 6c4ed96a7e63bf306da2f487aff05573e4f9961f..aef5f0994c94adc180c697a1b62ebd9c6f261305 100644 --- a/serpent.c +++ b/serpent.c @@ -8,6 +8,7 @@ /* nettle, low-level cryptographics library * + * Copyright (C) 2011 Niels M�ller * Copyright (C) 2010, 2011 Simon Josefsson * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. * @@ -56,13 +57,10 @@ /* Serpent works on 128 bit blocks. */ typedef uint32_t serpent_block_t[4]; -/* Serpent key, provided by the user. If the original key is shorter - than 256 bits, it is padded. */ -typedef uint32_t serpent_key_t[8]; - -#define rol(x,n) ((((uint32_t)(x))<<(n))| \ +/* FIXME: Unify ROL macros used here, in camellia.c and cast128.c. */ +#define ROL32(x,n) ((((uint32_t)(x))<<(n))| \ (((uint32_t)(x))>>(32-(n)))) -#define ror(x,n) ((((uint32_t)(x))<<(32-(n)))| \ +#define ROR32(x,n) ((((uint32_t)(x))<<(32-(n)))| \ (((uint32_t)(x))>>(n))) /* These are the S-Boxes of Serpent. They are copied from Serpents @@ -82,7 +80,7 @@ typedef uint32_t serpent_key_t[8]; are welcome to use Serpent for any application." */ #define SBOX0(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t05, t06, t07, t08, t09; \ uint32_t t11, t12, t13, t14, t15, t17, t01; \ t01 = b ^ c ; \ @@ -103,10 +101,10 @@ typedef uint32_t serpent_key_t[8]; w = ~ t15; \ t17 = w ^ t14; \ x = t12 ^ t17; \ - } + } while (0) #define SBOX0_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t08, t09, t10; \ uint32_t t12, t13, t14, t15, t17, t18, t01; \ t01 = c ^ d ; \ @@ -128,10 +126,10 @@ typedef uint32_t serpent_key_t[8]; t17 = t05 & t13; \ t18 = t14 | t17; \ w = t15 ^ t18; \ - } + } while (0) #define SBOX1(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t07, t08; \ uint32_t t10, t11, t12, t13, t16, t17, t01; \ t01 = a | d ; \ @@ -152,10 +150,10 @@ typedef uint32_t serpent_key_t[8]; t16 = t10 | x ; \ t17 = t05 & t16; \ w = c ^ t17; \ - } + } while (0) #define SBOX1_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t07, t08; \ uint32_t t09, t10, t11, t14, t15, t17, t01; \ t01 = a ^ b ; \ @@ -176,10 +174,10 @@ typedef uint32_t serpent_key_t[8]; z = t01 ^ t04; \ t17 = c ^ t15; \ w = t14 ^ t17; \ - } + } while (0) #define SBOX2(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t05, t06, t07, t08; \ uint32_t t09, t10, t12, t13, t14, t01; \ t01 = a | c ; \ @@ -198,10 +196,10 @@ typedef uint32_t serpent_key_t[8]; t14 = b ^ t13; \ z = ~ t09; \ y = t12 ^ t14; \ - } + } while (0) #define SBOX2_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t06, t07, t08, t09; \ uint32_t t10, t11, t12, t15, t16, t17, t01; \ t01 = a ^ d ; \ @@ -222,10 +220,10 @@ typedef uint32_t serpent_key_t[8]; t16 = w ^ x ; \ t17 = t10 ^ t15; \ y = t16 ^ t17; \ - } + } while (0) #define SBOX3(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t07, t08; \ uint32_t t09, t10, t11, t13, t14, t15, t01; \ t01 = a ^ c ; \ @@ -246,10 +244,10 @@ typedef uint32_t serpent_key_t[8]; y = t08 ^ t11; \ w = t14 ^ t15; \ x = t05 ^ t04; \ - } + } while (0) #define SBOX3_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t07, t09; \ uint32_t t11, t12, t13, t14, t16, t01; \ t01 = c | d ; \ @@ -269,10 +267,10 @@ typedef uint32_t serpent_key_t[8]; x = b ^ t12; \ t16 = b | t13; \ z = t14 ^ t16; \ - } + } while (0) #define SBOX4(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t08, t09; \ uint32_t t10, t11, t12, t13, t14, t15, t16, t01; \ t01 = a | b ; \ @@ -294,10 +292,10 @@ typedef uint32_t serpent_key_t[8]; y = t13 ^ t08; \ x = t15 ^ t16; \ w = ~ t14; \ - } + } while (0) #define SBOX4_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t07, t09; \ uint32_t t10, t11, t12, t13, t15, t01; \ t01 = b | d ; \ @@ -317,10 +315,10 @@ typedef uint32_t serpent_key_t[8]; t15 = a ^ t04; \ y = t11 ^ t13; \ w = t15 ^ t09; \ - } + } while (0) #define SBOX5(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t07, t08, t09; \ uint32_t t10, t11, t12, t13, t14, t01; \ t01 = b ^ d ; \ @@ -340,10 +338,10 @@ typedef uint32_t serpent_key_t[8]; y = t09 ^ t13; \ x = t07 ^ t08; \ z = t12 ^ t14; \ - } + } while (0) #define SBOX5_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t07, t08, t09; \ uint32_t t10, t12, t13, t15, t16, t01; \ t01 = a & d ; \ @@ -363,10 +361,10 @@ typedef uint32_t serpent_key_t[8]; t15 = t02 ^ t13; \ t16 = b ^ d ; \ y = t16 ^ t15; \ - } + } while (0) #define SBOX6(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t07, t08, t09, t10; \ uint32_t t11, t12, t13, t15, t17, t18, t01; \ t01 = a & d ; \ @@ -388,10 +386,10 @@ typedef uint32_t serpent_key_t[8]; t17 = a ^ b ; \ t18 = y ^ t15; \ w = t17 ^ t18; \ - } + } while (0) #define SBOX6_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t07, t08, t09; \ uint32_t t12, t13, t14, t15, t16, t17, t01; \ t01 = a ^ c ; \ @@ -413,10 +411,10 @@ typedef uint32_t serpent_key_t[8]; t17 = a ^ x ; \ z = t17 ^ t15; \ y = t16 ^ t14; \ - } + } while (0) #define SBOX7(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t05, t06, t08, t09, t10; \ uint32_t t11, t13, t14, t15, t16, t17, t01; \ t01 = a & c ; \ @@ -438,10 +436,10 @@ typedef uint32_t serpent_key_t[8]; t17 = t02 | t14; \ w = t15 ^ t17; \ y = a ^ t16; \ - } + } while (0) #define SBOX7_INVERSE(a, b, c, d, w, x, y, z) \ - { \ + do { \ uint32_t t02, t03, t04, t06, t07, t08, t09; \ uint32_t t10, t11, t13, t14, t15, t16, t01; \ t01 = a & b ; \ @@ -462,7 +460,7 @@ typedef uint32_t serpent_key_t[8]; t16 = t01 | t10; \ w = t13 ^ t15; \ y = t14 ^ t16; \ - } + } while (0) /* XOR BLOCK1 into BLOCK0. */ #define BLOCK_XOR(block0, block1) \ @@ -502,97 +500,115 @@ typedef uint32_t serpent_key_t[8]; /* Apply the linear transformation to BLOCK. */ #define LINEAR_TRANSFORMATION(block) \ - { \ - block[0] = rol (block[0], 13); \ - block[2] = rol (block[2], 3); \ + do { \ + block[0] = ROL32 (block[0], 13); \ + block[2] = ROL32 (block[2], 3); \ block[1] = block[1] ^ block[0] ^ block[2]; \ block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ - block[1] = rol (block[1], 1); \ - block[3] = rol (block[3], 7); \ + block[1] = ROL32 (block[1], 1); \ + block[3] = ROL32 (block[3], 7); \ block[0] = block[0] ^ block[1] ^ block[3]; \ block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ - block[0] = rol (block[0], 5); \ - block[2] = rol (block[2], 22); \ - } + block[0] = ROL32 (block[0], 5); \ + block[2] = ROL32 (block[2], 22); \ + } while (0) /* Apply the inverse linear transformation to BLOCK. */ #define LINEAR_TRANSFORMATION_INVERSE(block) \ - { \ - block[2] = ror (block[2], 22); \ - block[0] = ror (block[0] , 5); \ + do { \ + block[2] = ROR32 (block[2], 22); \ + block[0] = ROR32 (block[0] , 5); \ block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ block[0] = block[0] ^ block[1] ^ block[3]; \ - block[3] = ror (block[3], 7); \ - block[1] = ror (block[1], 1); \ + block[3] = ROR32 (block[3], 7); \ + block[1] = ROR32 (block[1], 1); \ block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ block[1] = block[1] ^ block[0] ^ block[2]; \ - block[2] = ror (block[2], 3); \ - block[0] = ror (block[0], 13); \ - } + block[2] = ROR32 (block[2], 3); \ + block[0] = ROR32 (block[0], 13); \ + } while (0) /* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. This macro increments `round'. */ #define ROUND(which, subkeys, block, block_tmp) \ - { \ + do { \ BLOCK_XOR (block, subkeys[round]); \ round++; \ SBOX (which, block, block_tmp, 0); \ LINEAR_TRANSFORMATION (block_tmp); \ BLOCK_COPY (block, block_tmp); \ - } + } while (0) /* Apply the last Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. The result will be stored in BLOCK_TMP. This macro increments `round'. */ #define ROUND_LAST(which, subkeys, block, block_tmp) \ - { \ + do { \ BLOCK_XOR (block, subkeys[round]); \ round++; \ SBOX (which, block, block_tmp, 0); \ BLOCK_XOR (block_tmp, subkeys[round]); \ round++; \ - } + } while (0) /* Apply an inverse Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. This macro increments `round'. */ #define ROUND_INVERSE(which, subkey, block, block_tmp) \ - { \ + do { \ LINEAR_TRANSFORMATION_INVERSE (block); \ SBOX_INVERSE (which, block, block_tmp, 0); \ BLOCK_XOR (block_tmp, subkey[round]); \ round--; \ BLOCK_COPY (block, block_tmp); \ - } + } while (0) /* Apply the first Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. The result will be stored in BLOCK_TMP. This macro increments `round'. */ #define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \ - { \ + do { \ BLOCK_XOR (block, subkeys[round]); \ round--; \ SBOX_INVERSE (which, block, block_tmp, 0); \ BLOCK_XOR (block_tmp, subkeys[round]); \ round--; \ - } - -/* Convert the user provided key KEY of KEY_LENGTH bytes into the - internally used format. */ + } while (0) + +/* Note: Increments k */ +#define KS_RECURRENCE(w, i, k) \ + do { \ + uint32_t _wn = (w)[(i)] ^ (w)[((i)+3)&7] ^ w[((i)+5)&7] \ + ^ w[((i)+7)&7] ^ PHI ^ (k)++; \ + ((w)[(i)] = ROL32(_wn, 11)); \ + } while (0) + +/* Note: Increments k four times and keys once */ +#define KS(keys, s, w, i, k) \ + do { \ + KS_RECURRENCE(w, (i), (k)); \ + KS_RECURRENCE(w, (i)+1, (k)); \ + KS_RECURRENCE(w, (i)+2, (k)); \ + KS_RECURRENCE(w, (i)+3, (k)); \ + SBOX##s(w[(i)],w[(i)+1],w[(i)+2],w[(i)+3], \ + (*keys)[0],(*keys)[1],(*keys)[2],(*keys)[3]); \ + (keys)++; \ + } while (0) + +/* Pad user key and convert to an array of 8 uint32_t. */ static void -serpent_key_prepare (const uint8_t * key, unsigned int key_length, - serpent_key_t key_prepared) +serpent_key_pad (const uint8_t *key, unsigned int key_length, + uint32_t *w) { unsigned int i; assert (key_length <= SERPENT_MAX_KEY_SIZE); - /* Copy key. */ for (i = 0; key_length >= 4; key_length -=4, key += 4) - key_prepared[i++] = LE_READ_UINT32(key); + w[i++] = LE_READ_UINT32(key); if (i < 8) { @@ -603,80 +619,46 @@ serpent_key_prepare (const uint8_t * key, unsigned int key_length, while (key_length > 0) pad = pad << 8 | key[--key_length]; - key_prepared[i++] = pad; + w[i++] = pad; while (i < 8) - key_prepared[i++] = 0; + w[i++] = 0; } } -/* Derive the 33 subkeys from KEY and store them in SUBKEYS. */ -static void -serpent_subkeys_generate (serpent_key_t key, struct serpent_ctx *ctx) -{ - uint32_t w_real[140]; /* The `prekey'. */ - uint32_t k[132]; - uint32_t *w = &w_real[8]; - int i, j; - - /* Initialize with key values. */ - for (i = 0; i < 8; i++) - w[i - 8] = key[i]; - - /* Expand to intermediate key using the affine recurrence. */ - for (i = 0; i < 132; i++) - w[i] = rol (w[i - 8] ^ w[i - 5] ^ w[i - 3] ^ w[i - 1] ^ PHI ^ i, 11); - - /* Calculate subkeys via S-Boxes, in bitslice mode. */ - SBOX (3, w, k, 0); - SBOX (2, w, k, 4); - SBOX (1, w, k, 8); - SBOX (0, w, k, 12); - SBOX (7, w, k, 16); - SBOX (6, w, k, 20); - SBOX (5, w, k, 24); - SBOX (4, w, k, 28); - SBOX (3, w, k, 32); - SBOX (2, w, k, 36); - SBOX (1, w, k, 40); - SBOX (0, w, k, 44); - SBOX (7, w, k, 48); - SBOX (6, w, k, 52); - SBOX (5, w, k, 56); - SBOX (4, w, k, 60); - SBOX (3, w, k, 64); - SBOX (2, w, k, 68); - SBOX (1, w, k, 72); - SBOX (0, w, k, 76); - SBOX (7, w, k, 80); - SBOX (6, w, k, 84); - SBOX (5, w, k, 88); - SBOX (4, w, k, 92); - SBOX (3, w, k, 96); - SBOX (2, w, k, 100); - SBOX (1, w, k, 104); - SBOX (0, w, k, 108); - SBOX (7, w, k, 112); - SBOX (6, w, k, 116); - SBOX (5, w, k, 120); - SBOX (4, w, k, 124); - SBOX (3, w, k, 128); - - /* Renumber subkeys. */ - for (i = 0; i < ROUNDS + 1; i++) - for (j = 0; j < 4; j++) - ctx->keys[i][j] = k[4 * i + j]; -} - /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */ void serpent_set_key (struct serpent_ctx *ctx, unsigned length, const uint8_t * key) { - serpent_key_t key_prepared; + uint32_t w[8]; + uint32_t (*keys)[4]; + unsigned k; + + serpent_key_pad (key, length, w); - serpent_key_prepare (key, length, key_prepared); - serpent_subkeys_generate (key_prepared, ctx); + /* Derive the 33 subkeys from KEY and store them in SUBKEYS. We do + the recurrence in the key schedule using W as a circular buffer + of just 8 uint32_t. */ + + /* FIXME: Would be better to invoke SBOX with scalar variables as + arguments, no arrays. To do that, unpack w into separate + variables, use temporary variables as the SBOX destination. */ + + for (keys = ctx->keys, k = 0; k < 128;) + { + KS(keys, 3, w, 0, k); + KS(keys, 2, w, 4, k); + KS(keys, 1, w, 0, k); + KS(keys, 0, w, 4, k); + KS(keys, 7, w, 0, k); + KS(keys, 6, w, 4, k); + KS(keys, 5, w, 0, k); + KS(keys, 4, w, 4, k); + } + KS(keys, 3, w, 0, k); + assert (k == 132); + assert (keys == ctx->keys + 33); } void