diff --git a/aes-decrypt-internal.c b/aes-decrypt-internal.c index fcd7289a06c630dd45fc5c26944e0181bed7b84c..47747ab868dc00dd0ce0cd1f3004fd101bfa3a82 100644 --- a/aes-decrypt-internal.c +++ b/aes-decrypt-internal.c @@ -60,22 +60,23 @@ _nettle_aes_decrypt(unsigned rounds, const uint32_t *keys, { uint32_t w0, w1, w2, w3; /* working ciphertext */ uint32_t t0, t1, t2, t3; + const uint32_t *p; unsigned i; /* Get clear text, using little-endian byte order. * Also XOR with the first subkey. */ - w0 = LE_READ_UINT32(src) ^ keys[4*rounds]; - w1 = LE_READ_UINT32(src + 4) ^ keys[4*rounds + 1]; - w2 = LE_READ_UINT32(src + 8) ^ keys[4*rounds + 2]; - w3 = LE_READ_UINT32(src + 12) ^ keys[4*rounds + 3]; + w0 = LE_READ_UINT32(src) ^ keys[0]; + w1 = LE_READ_UINT32(src + 4) ^ keys[1]; + w2 = LE_READ_UINT32(src + 8) ^ keys[2]; + w3 = LE_READ_UINT32(src + 12) ^ keys[3]; - for (i = rounds - 1; i > 0; i--) + for (i = 1, p = keys - 4; i < rounds; i++, p -= 4) { - t0 = AES_ROUND(T, w0, w3, w2, w1, keys[4*i]); - t1 = AES_ROUND(T, w1, w0, w3, w2, keys[4*i + 1]); - t2 = AES_ROUND(T, w2, w1, w0, w3, keys[4*i + 2]); - t3 = AES_ROUND(T, w3, w2, w1, w0, keys[4*i + 3]); + t0 = AES_ROUND(T, w0, w3, w2, w1, p[0]); + t1 = AES_ROUND(T, w1, w0, w3, w2, p[1]); + t2 = AES_ROUND(T, w2, w1, w0, w3, p[2]); + t3 = AES_ROUND(T, w3, w2, w1, w0, p[3]); /* We could unroll the loop twice, to avoid these assignments. If all eight variables fit in registers, @@ -88,10 +89,10 @@ _nettle_aes_decrypt(unsigned rounds, const uint32_t *keys, /* Final round */ - t0 = AES_FINAL_ROUND(T, w0, w3, w2, w1, keys[0]); - t1 = AES_FINAL_ROUND(T, w1, w0, w3, w2, keys[1]); - t2 = AES_FINAL_ROUND(T, w2, w1, w0, w3, keys[2]); - t3 = AES_FINAL_ROUND(T, w3, w2, w1, w0, keys[3]); + t0 = AES_FINAL_ROUND(T, w0, w3, w2, w1, p[0]); + t1 = AES_FINAL_ROUND(T, w1, w0, w3, w2, p[1]); + t2 = AES_FINAL_ROUND(T, w2, w1, w0, w3, p[2]); + t3 = AES_FINAL_ROUND(T, w3, w2, w1, w0, p[3]); LE_WRITE_UINT32(dst, t0); LE_WRITE_UINT32(dst + 4, t1); diff --git a/aes-internal.h b/aes-internal.h index 64cf7be56ed1d27ecc414154866770b977593e99..442b045b279121612cf3b2a17177ecf852f2a1e5 100644 --- a/aes-internal.h +++ b/aes-internal.h @@ -66,6 +66,8 @@ _nettle_aes_encrypt(unsigned rounds, const uint32_t *keys, size_t length, uint8_t *dst, const uint8_t *src); +/* The keys pointer points at the subkeys for the first decrypt round, + located at the end of the array. */ void _nettle_aes_decrypt(unsigned rounds, const uint32_t *keys, const struct aes_table *T, diff --git a/aes128-decrypt.c b/aes128-decrypt.c index 436438cc38cd36961ae01572bb629ea82a3a9762..96d6bc6ed57c64c1bd90b80266ce1e0f1f461ff6 100644 --- a/aes128-decrypt.c +++ b/aes128-decrypt.c @@ -54,6 +54,6 @@ nettle_aes128_decrypt(const struct aes128_ctx *ctx, const uint8_t *src) { assert(!(length % AES_BLOCK_SIZE) ); - _nettle_aes_decrypt(_AES128_ROUNDS, ctx->keys, &_nettle_aes_decrypt_table, - length, dst, src); + _nettle_aes_decrypt(_AES128_ROUNDS, ctx->keys + 4*_AES128_ROUNDS, + &_nettle_aes_decrypt_table, length, dst, src); } diff --git a/aes192-decrypt.c b/aes192-decrypt.c index 7746c76e574c09072c90836623e8c42d2581c82f..bf8702e4c679b0129210ad84efd3a0716dfa3562 100644 --- a/aes192-decrypt.c +++ b/aes192-decrypt.c @@ -54,6 +54,6 @@ nettle_aes192_decrypt(const struct aes192_ctx *ctx, const uint8_t *src) { assert(!(length % AES_BLOCK_SIZE) ); - _nettle_aes_decrypt(_AES192_ROUNDS, ctx->keys, &_nettle_aes_decrypt_table, - length, dst, src); + _nettle_aes_decrypt(_AES192_ROUNDS, ctx->keys + 4 * _AES192_ROUNDS, + &_nettle_aes_decrypt_table, length, dst, src); } diff --git a/aes256-decrypt.c b/aes256-decrypt.c index 89411c10ee3ecad7688271f4996b5c268b2a4826..ae050797abacdbc1b5afe6346956be12f8c76a36 100644 --- a/aes256-decrypt.c +++ b/aes256-decrypt.c @@ -54,6 +54,6 @@ nettle_aes256_decrypt(const struct aes256_ctx *ctx, const uint8_t *src) { assert(!(length % AES_BLOCK_SIZE) ); - _nettle_aes_decrypt(_AES256_ROUNDS, ctx->keys, &_nettle_aes_decrypt_table, - length, dst, src); + _nettle_aes_decrypt(_AES256_ROUNDS, ctx->keys + 4 * _AES256_ROUNDS, + &_nettle_aes_decrypt_table, length, dst, src); } diff --git a/sparc64/aes-decrypt-internal.asm b/sparc64/aes-decrypt-internal.asm index 5e7952337e2c1831b91b5ddabe0a3d195650f6eb..8918de1baf84a9f700d76ca8ff8c508651e34a76 100644 --- a/sparc64/aes-decrypt-internal.asm +++ b/sparc64/aes-decrypt-internal.asm @@ -88,9 +88,6 @@ PROLOGUE(_nettle_aes_decrypt) add T, AES_TABLE2, T2 add T, AES_TABLE3, T3 - sll ROUNDS, 4, W0 C Can use W0 as scratch - add KEYS, W0, KEYS C Point to last subkey - C Must be even, and includes the final round srl ROUNDS, 1, ROUNDS C Last two rounds handled specially diff --git a/x86/aes-decrypt-internal.asm b/x86/aes-decrypt-internal.asm index 36d6ca44dc4b50b972a054107c50d5f737f10042..d8d980da8b4880a56e1c022ba38770fa93bfdf37 100644 --- a/x86/aes-decrypt-internal.asm +++ b/x86/aes-decrypt-internal.asm @@ -90,10 +90,7 @@ PROLOGUE(_nettle_aes_decrypt) jz .Lend shrl $4, PARAM_LENGTH - movl PARAM_ROUNDS, TMP decl PARAM_ROUNDS - shll $4, TMP - addl TMP, PARAM_KEYS .Lblock_loop: movl PARAM_KEYS, KEY C address of subkeys diff --git a/x86_64/aes-decrypt-internal.asm b/x86_64/aes-decrypt-internal.asm index ed753a2c00fc596c348f009f8878fba43944e87f..afa44f3568d72bba9038d536d3d3f2c4075dfcce 100644 --- a/x86_64/aes-decrypt-internal.asm +++ b/x86_64/aes-decrypt-internal.asm @@ -83,9 +83,7 @@ PROLOGUE(_nettle_aes_decrypt) push %r15 subl $1, XREG(ROUNDS) - push ROUNDS C Rounds stored at (%rsp) - shl $4, XREG(ROUNDS) C Zero-extends - lea 16(KEYS, ROUNDS), KEYS + push ROUNDS C Rounds stored at (%rsp) mov PARAM_TABLE, TABLE mov PARAM_LENGTH, LENGTH @@ -94,8 +92,8 @@ PROLOGUE(_nettle_aes_decrypt) mov KEYS, KEY AES_LOAD(SA, SB, SC, SD, SRC, KEY) + add $16, SRC C Increment src pointer - add $16, SRC C increment src pointer movl (%rsp), XREG(ROUNDS) sub $16, KEY C point to next key