diff --git a/ChangeLog b/ChangeLog index 10daf7de632b5e9bd107280673ee2a50b9eef2f0..2685b5f99932025a37366cb6dfb0e0a4a9d5ce07 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2010-03-24 Niels M�ller <nisse@lysator.liu.se> + * Makefile.in (nettle_SOURCES): Added sha256-compress.c. + + * sha256.c: Reorganized to use _nettle_sha256_compress. + + * sha256-compress.c (_nettle_sha256_compress): Compression + function extracted from sha256.c to a new file. + * examples/nettle-benchmark.c (main): Benchmark sha512. * rsa-keygen.c (rsa_generate_keypair): Ensure that bit size of e diff --git a/Makefile.in b/Makefile.in index ed364d27886f8364c47015aa0939d684c4c9da1c..f2d82d950a93af26b71f5af1be59bf6781be1387 100644 --- a/Makefile.in +++ b/Makefile.in @@ -64,7 +64,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \ knuth-lfib.c \ md2.c md2-meta.c md4.c md4-meta.c \ md5.c md5-compress.c md5-compat.c md5-meta.c \ - sha1.c sha1-compress.c sha1-meta.c sha256.c sha256-meta.c \ + sha1.c sha1-compress.c sha1-meta.c sha256.c sha256-compress.c sha256-meta.c \ sha512.c sha512-meta.c \ serpent.c serpent-meta.c \ twofish.c twofish-meta.c \ diff --git a/sha.h b/sha.h index 5040e705f127892aa0a80f147f96b4d2ac8fa54a..c95dffdaa29c2f4dad455faca102381cdccec0be 100644 --- a/sha.h +++ b/sha.h @@ -106,6 +106,12 @@ sha256_digest(struct sha256_ctx *ctx, unsigned length, uint8_t *digest); +/* Internal compression function. STATE points to 8 uint32_t words, + DATA points to 64 bytes of input data, possibly unaligned, and K + points to the table of constants. */ +void +_nettle_sha256_compress(uint32_t *state, const uint8_t *data, const uint32_t *k); + /* SHA512 */ #define SHA512_DIGEST_SIZE 64 diff --git a/sha256-compress.c b/sha256-compress.c new file mode 100644 index 0000000000000000000000000000000000000000..ad03039a83ca39abc9e8b372b9217b6ad53d21b3 --- /dev/null +++ b/sha256-compress.c @@ -0,0 +1,168 @@ +/* sha256-compress.c + * + * The compression function of the sha256 hash function. + */ + +/* nettle, low-level cryptographics library + * + * Copyright (C) 2001, 2010 Niels M�ller + * + * The nettle library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * The nettle library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the nettle library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "sha.h" + +#include "macros.h" + +/* A block, treated as a sequence of 32-bit words. */ +#define SHA256_DATA_LENGTH 16 + +#define ROTR(n,x) ((x)>>(n) | ((x)<<(32-(n)))) +#define SHR(n,x) ((x)>>(n)) + +/* The SHA256 functions. The Choice function is the same as the SHA1 + function f1, and the majority function is the same as the SHA1 f3 + function. They can be optimized to save one boolean operation each + - thanks to Rich Schroeppel, rcs@cs.arizona.edu for discovering + this */ + +/* #define Choice(x,y,z) ( ( (x) & (y) ) | ( ~(x) & (z) ) ) */ +#define Choice(x,y,z) ( (z) ^ ( (x) & ( (y) ^ (z) ) ) ) +/* #define Majority(x,y,z) ( ((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)) ) */ +#define Majority(x,y,z) ( ((x) & (y)) ^ ((z) & ((x) ^ (y))) ) + +#define S0(x) (ROTR(2,(x)) ^ ROTR(13,(x)) ^ ROTR(22,(x))) +#define S1(x) (ROTR(6,(x)) ^ ROTR(11,(x)) ^ ROTR(25,(x))) + +#define s0(x) (ROTR(7,(x)) ^ ROTR(18,(x)) ^ SHR(3,(x))) +#define s1(x) (ROTR(17,(x)) ^ ROTR(19,(x)) ^ SHR(10,(x))) + +/* The initial expanding function. The hash function is defined over an + 64-word expanded input array W, where the first 16 are copies of the input + data, and the remaining 64 are defined by + + W[ t ] = s1(W[t-2]) + W[t-7] + s0(W[i-15]) + W[i-16] + + This implementation generates these values on the fly in a circular + buffer - thanks to Colin Plumb, colin@nyx10.cs.du.edu for this + optimization. +*/ + +#define EXPAND(W,i) \ +( W[(i) & 15 ] += (s1(W[((i)-2) & 15]) + W[((i)-7) & 15] + s0(W[((i)-15) & 15])) ) + +/* The prototype SHA sub-round. The fundamental sub-round is: + + T1 = h + S1(e) + Choice(e,f,g) + K[t] + W[t] + T2 = S0(a) + Majority(a,b,c) + a' = T1+T2 + b' = a + c' = b + d' = c + e' = d + T1 + f' = e + g' = f + h' = g + + but this is implemented by unrolling the loop 8 times and renaming + the variables + ( h, a, b, c, d, e, f, g ) = ( a, b, c, d, e, f, g, h ) each + iteration. */ + +/* It's crucial that DATA is only used once, as that argument will + * have side effects. */ +#define ROUND(a,b,c,d,e,f,g,h,k,data) do { \ + uint32_t T = h + S1(e) + Choice(e,f,g) + k + data; \ + d += T; \ + h = T + S0(a) + Majority(a,b,c); \ +} while (0) + +void +_nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) +{ + uint32_t data[SHA256_DATA_LENGTH]; + uint32_t A, B, C, D, E, F, G, H; /* Local vars */ + unsigned i; + uint32_t *d; + + for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4) + { + data[i] = READ_UINT32(input); + } + + /* Set up first buffer and local data buffer */ + A = state[0]; + B = state[1]; + C = state[2]; + D = state[3]; + E = state[4]; + F = state[5]; + G = state[6]; + H = state[7]; + + /* Heavy mangling */ + /* First 16 subrounds that act on the original data */ + + for (i = 0, d = data; i<16; i+=8, k += 8, d+= 8) + { + ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); + ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); + ROUND(G, H, A, B, C, D, E, F, k[2], d[2]); + ROUND(F, G, H, A, B, C, D, E, k[3], d[3]); + ROUND(E, F, G, H, A, B, C, D, k[4], d[4]); + ROUND(D, E, F, G, H, A, B, C, k[5], d[5]); + ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); + ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); + } + + for (; i<64; i += 16, k+= 16) + { + ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); + ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); + ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); + ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); + ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); + ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); + ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); + ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); + ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); + ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); + ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); + ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); + ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); + ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); + ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); + ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); + } + + /* Update state */ + state[0] += A; + state[1] += B; + state[2] += C; + state[3] += D; + state[4] += E; + state[5] += F; + state[6] += G; + state[7] += H; +} diff --git a/sha256.c b/sha256.c index 2cf7071dd3e67b2ed0d1843b2a581f0b38aed1ee..9a3f1bb2cba48feefbb5dce09ad7013c3564a9eb 100644 --- a/sha256.c +++ b/sha256.c @@ -39,29 +39,6 @@ #include "macros.h" -/* A block, treated as a sequence of 32-bit words. */ -#define SHA256_DATA_LENGTH 16 - -#define ROTR(n,x) ((x)>>(n) | ((x)<<(32-(n)))) -#define SHR(n,x) ((x)>>(n)) - -/* The SHA256 functions. The Choice function is the same as the SHA1 - function f1, and the majority function is the same as the SHA1 f3 - function. They can be optimized to save one boolean operation each - - thanks to Rich Schroeppel, rcs@cs.arizona.edu for discovering - this */ - -/* #define Choice(x,y,z) ( ( (x) & (y) ) | ( ~(x) & (z) ) ) */ -#define Choice(x,y,z) ( (z) ^ ( (x) & ( (y) ^ (z) ) ) ) -/* #define Majority(x,y,z) ( ((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)) ) */ -#define Majority(x,y,z) ( ((x) & (y)) ^ ((z) & ((x) ^ (y))) ) - -#define S0(x) (ROTR(2,(x)) ^ ROTR(13,(x)) ^ ROTR(22,(x))) -#define S1(x) (ROTR(6,(x)) ^ ROTR(11,(x)) ^ ROTR(25,(x))) - -#define s0(x) (ROTR(7,(x)) ^ ROTR(18,(x)) ^ SHR(3,(x))) -#define s1(x) (ROTR(17,(x)) ^ ROTR(19,(x)) ^ SHR(10,(x))) - /* Generated by the shadata program. */ static const uint32_t K[64] = @@ -84,47 +61,6 @@ K[64] = 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL, }; -/* The initial expanding function. The hash function is defined over an - 64-word expanded input array W, where the first 16 are copies of the input - data, and the remaining 64 are defined by - - W[ t ] = s1(W[t-2]) + W[t-7] + s0(W[i-15]) + W[i-16] - - This implementation generates these values on the fly in a circular - buffer - thanks to Colin Plumb, colin@nyx10.cs.du.edu for this - optimization. -*/ - -#define EXPAND(W,i) \ -( W[(i) & 15 ] += (s1(W[((i)-2) & 15]) + W[((i)-7) & 15] + s0(W[((i)-15) & 15])) ) - -/* The prototype SHA sub-round. The fundamental sub-round is: - - T1 = h + S1(e) + Choice(e,f,g) + K[t] + W[t] - T2 = S0(a) + Majority(a,b,c) - a' = T1+T2 - b' = a - c' = b - d' = c - e' = d + T1 - f' = e - g' = f - h' = g - - but this is implemented by unrolling the loop 8 times and renaming - the variables - ( h, a, b, c, d, e, f, g ) = ( a, b, c, d, e, f, g, h ) each - iteration. This code is then replicated 8, using the next 8 values - from the W[] array each time */ - -/* It's crucial that DATA is only used once, as that argument will - * have side effects. */ -#define ROUND(a,b,c,d,e,f,g,h,k,data) do { \ - uint32_t T = h + S1(e) + Choice(e,f,g) + k + data; \ - d += T; \ - h = T + S0(a) + Majority(a,b,c); \ -} while (0) - /* Initialize the SHA values */ void @@ -146,93 +82,7 @@ sha256_init(struct sha256_ctx *ctx) ctx->index = 0; } -/* Perform the SHA transformation. Note that this code, like MD5, seems to - break some optimizing compilers due to the complexity of the expressions - and the size of the basic block. It may be necessary to split it into - sections, e.g. based on the four subrounds - - Note that this function destroys the data area */ - -static void -sha256_transform(uint32_t *state, uint32_t *data) -{ - uint32_t A, B, C, D, E, F, G, H; /* Local vars */ - unsigned i; - const uint32_t *k; - uint32_t *d; - - /* Set up first buffer and local data buffer */ - A = state[0]; - B = state[1]; - C = state[2]; - D = state[3]; - E = state[4]; - F = state[5]; - G = state[6]; - H = state[7]; - - /* Heavy mangling */ - /* First 16 subrounds that act on the original data */ - - for (i = 0, k = K, d = data; i<16; i+=8, k += 8, d+= 8) - { - ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); - ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); - ROUND(G, H, A, B, C, D, E, F, k[2], d[2]); - ROUND(F, G, H, A, B, C, D, E, k[3], d[3]); - ROUND(E, F, G, H, A, B, C, D, k[4], d[4]); - ROUND(D, E, F, G, H, A, B, C, k[5], d[5]); - ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); - ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); - } - - for (; i<64; i += 16, k+= 16) - { - ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); - ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); - ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); - ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); - ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); - ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); - ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); - ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); - ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); - ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); - ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); - ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); - ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); - ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); - ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); - ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); - } - - /* Update state */ - state[0] += A; - state[1] += B; - state[2] += C; - state[3] += D; - state[4] += E; - state[5] += F; - state[6] += G; - state[7] += H; -} - -static void -sha256_block(struct sha256_ctx *ctx, const uint8_t *block) -{ - uint32_t data[SHA256_DATA_LENGTH]; - int i; - - /* Update block count */ - if (!++ctx->count_low) - ++ctx->count_high; - - /* Endian independent conversion */ - for (i = 0; i<SHA256_DATA_LENGTH; i++, block += 4) - data[i] = READ_UINT32(block); - - sha256_transform(ctx->state, data); -} +#define SHA256_INCR(ctx) ((ctx)->count_high += !++(ctx)->count_low) void sha256_update(struct sha256_ctx *ctx, @@ -250,14 +100,19 @@ sha256_update(struct sha256_ctx *ctx, else { memcpy(ctx->block + ctx->index, buffer, left); - sha256_block(ctx, ctx->block); + + _nettle_sha256_compress(ctx->state, ctx->block, K); + SHA256_INCR(ctx); + buffer += left; length -= left; } } while (length >= SHA256_DATA_SIZE) { - sha256_block(ctx, buffer); + _nettle_sha256_compress(ctx->state, buffer, K); + SHA256_INCR(ctx); + buffer += SHA256_DATA_SIZE; length -= SHA256_DATA_SIZE; } @@ -275,9 +130,9 @@ sha256_update(struct sha256_ctx *ctx, static void sha256_final(struct sha256_ctx *ctx) { - uint32_t data[SHA256_DATA_LENGTH]; + uint32_t bitcount_high; + uint32_t bitcount_low; int i; - int words; i = ctx->index; @@ -287,32 +142,29 @@ sha256_final(struct sha256_ctx *ctx) assert(i < SHA256_DATA_SIZE); ctx->block[i++] = 0x80; - /* Fill rest of word */ - for( ; i & 3; i++) - ctx->block[i] = 0; - - /* i is now a multiple of the word size 4 */ - words = i >> 2; - for (i = 0; i < words; i++) - data[i] = READ_UINT32(ctx->block + 4*i); - - if (words > (SHA256_DATA_LENGTH-2)) + if (i > (SHA1_DATA_SIZE - 8)) { /* No room for length in this block. Process it and * pad with another one */ - for (i = words ; i < SHA256_DATA_LENGTH; i++) - data[i] = 0; - sha256_transform(ctx->state, data); - for (i = 0; i < (SHA256_DATA_LENGTH-2); i++) - data[i] = 0; + memset(ctx->block + i, 0, SHA256_DATA_SIZE - i); + _nettle_sha256_compress(ctx->state, ctx->block, K); + + i = 0; } - else - for (i = words ; i < SHA256_DATA_LENGTH - 2; i++) - data[i] = 0; + + if (i < (SHA256_DATA_SIZE - 8)) + memset(ctx->block + i, 0, (SHA256_DATA_SIZE - 8) - i); /* There are 512 = 2^9 bits in one block */ - data[SHA256_DATA_LENGTH-2] = (ctx->count_high << 9) | (ctx->count_low >> 23); - data[SHA256_DATA_LENGTH-1] = (ctx->count_low << 9) | (ctx->index << 3); - sha256_transform(ctx->state, data); + bitcount_high = (ctx->count_high << 9) | (ctx->count_low >> 23); + bitcount_low = (ctx->count_low << 9) | (ctx->index << 3); + + /* This is slightly inefficient, as the numbers are converted to + big-endian format, and will be converted back by the compression + function. It's probably not worth the effort to fix this. */ + WRITE_UINT32(ctx->block + (SHA256_DATA_SIZE - 8), bitcount_high); + WRITE_UINT32(ctx->block + (SHA256_DATA_SIZE - 4), bitcount_low); + + _nettle_sha256_compress(ctx->state, ctx->block, K); } void