From c54e0270b010ca1a52c988835c170c6b32bd2935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Thu, 25 Jan 2024 20:36:28 +0100 Subject: [PATCH] Change _nettle_aes_decrypt to pass pointer to last subkey. --- aes-decrypt-internal.c | 27 ++++++++++++++------------- aes-internal.h | 2 ++ aes128-decrypt.c | 4 ++-- aes192-decrypt.c | 4 ++-- aes256-decrypt.c | 4 ++-- sparc64/aes-decrypt-internal.asm | 3 --- x86/aes-decrypt-internal.asm | 3 --- x86_64/aes-decrypt-internal.asm | 6 ++---- 8 files changed, 24 insertions(+), 29 deletions(-) diff --git a/aes-decrypt-internal.c b/aes-decrypt-internal.c index fcd7289a..47747ab8 100644 --- a/aes-decrypt-internal.c +++ b/aes-decrypt-internal.c @@ -60,22 +60,23 @@ _nettle_aes_decrypt(unsigned rounds, const uint32_t *keys, { uint32_t w0, w1, w2, w3; /* working ciphertext */ uint32_t t0, t1, t2, t3; + const uint32_t *p; unsigned i; /* Get clear text, using little-endian byte order. * Also XOR with the first subkey. */ - w0 = LE_READ_UINT32(src) ^ keys[4*rounds]; - w1 = LE_READ_UINT32(src + 4) ^ keys[4*rounds + 1]; - w2 = LE_READ_UINT32(src + 8) ^ keys[4*rounds + 2]; - w3 = LE_READ_UINT32(src + 12) ^ keys[4*rounds + 3]; + w0 = LE_READ_UINT32(src) ^ keys[0]; + w1 = LE_READ_UINT32(src + 4) ^ keys[1]; + w2 = LE_READ_UINT32(src + 8) ^ keys[2]; + w3 = LE_READ_UINT32(src + 12) ^ keys[3]; - for (i = rounds - 1; i > 0; i--) + for (i = 1, p = keys - 4; i < rounds; i++, p -= 4) { - t0 = AES_ROUND(T, w0, w3, w2, w1, keys[4*i]); - t1 = AES_ROUND(T, w1, w0, w3, w2, keys[4*i + 1]); - t2 = AES_ROUND(T, w2, w1, w0, w3, keys[4*i + 2]); - t3 = AES_ROUND(T, w3, w2, w1, w0, keys[4*i + 3]); + t0 = AES_ROUND(T, w0, w3, w2, w1, p[0]); + t1 = AES_ROUND(T, w1, w0, w3, w2, p[1]); + t2 = AES_ROUND(T, w2, w1, w0, w3, p[2]); + t3 = AES_ROUND(T, w3, w2, w1, w0, p[3]); /* We could unroll the loop twice, to avoid these assignments. If all eight variables fit in registers, @@ -88,10 +89,10 @@ _nettle_aes_decrypt(unsigned rounds, const uint32_t *keys, /* Final round */ - t0 = AES_FINAL_ROUND(T, w0, w3, w2, w1, keys[0]); - t1 = AES_FINAL_ROUND(T, w1, w0, w3, w2, keys[1]); - t2 = AES_FINAL_ROUND(T, w2, w1, w0, w3, keys[2]); - t3 = AES_FINAL_ROUND(T, w3, w2, w1, w0, keys[3]); + t0 = AES_FINAL_ROUND(T, w0, w3, w2, w1, p[0]); + t1 = AES_FINAL_ROUND(T, w1, w0, w3, w2, p[1]); + t2 = AES_FINAL_ROUND(T, w2, w1, w0, w3, p[2]); + t3 = AES_FINAL_ROUND(T, w3, w2, w1, w0, p[3]); LE_WRITE_UINT32(dst, t0); LE_WRITE_UINT32(dst + 4, t1); diff --git a/aes-internal.h b/aes-internal.h index 64cf7be5..442b045b 100644 --- a/aes-internal.h +++ b/aes-internal.h @@ -66,6 +66,8 @@ _nettle_aes_encrypt(unsigned rounds, const uint32_t *keys, size_t length, uint8_t *dst, const uint8_t *src); +/* The keys pointer points at the subkeys for the first decrypt round, + located at the end of the array. */ void _nettle_aes_decrypt(unsigned rounds, const uint32_t *keys, const struct aes_table *T, diff --git a/aes128-decrypt.c b/aes128-decrypt.c index 436438cc..96d6bc6e 100644 --- a/aes128-decrypt.c +++ b/aes128-decrypt.c @@ -54,6 +54,6 @@ nettle_aes128_decrypt(const struct aes128_ctx *ctx, const uint8_t *src) { assert(!(length % AES_BLOCK_SIZE) ); - _nettle_aes_decrypt(_AES128_ROUNDS, ctx->keys, &_nettle_aes_decrypt_table, - length, dst, src); + _nettle_aes_decrypt(_AES128_ROUNDS, ctx->keys + 4*_AES128_ROUNDS, + &_nettle_aes_decrypt_table, length, dst, src); } diff --git a/aes192-decrypt.c b/aes192-decrypt.c index 7746c76e..bf8702e4 100644 --- a/aes192-decrypt.c +++ b/aes192-decrypt.c @@ -54,6 +54,6 @@ nettle_aes192_decrypt(const struct aes192_ctx *ctx, const uint8_t *src) { assert(!(length % AES_BLOCK_SIZE) ); - _nettle_aes_decrypt(_AES192_ROUNDS, ctx->keys, &_nettle_aes_decrypt_table, - length, dst, src); + _nettle_aes_decrypt(_AES192_ROUNDS, ctx->keys + 4 * _AES192_ROUNDS, + &_nettle_aes_decrypt_table, length, dst, src); } diff --git a/aes256-decrypt.c b/aes256-decrypt.c index 89411c10..ae050797 100644 --- a/aes256-decrypt.c +++ b/aes256-decrypt.c @@ -54,6 +54,6 @@ nettle_aes256_decrypt(const struct aes256_ctx *ctx, const uint8_t *src) { assert(!(length % AES_BLOCK_SIZE) ); - _nettle_aes_decrypt(_AES256_ROUNDS, ctx->keys, &_nettle_aes_decrypt_table, - length, dst, src); + _nettle_aes_decrypt(_AES256_ROUNDS, ctx->keys + 4 * _AES256_ROUNDS, + &_nettle_aes_decrypt_table, length, dst, src); } diff --git a/sparc64/aes-decrypt-internal.asm b/sparc64/aes-decrypt-internal.asm index 5e795233..8918de1b 100644 --- a/sparc64/aes-decrypt-internal.asm +++ b/sparc64/aes-decrypt-internal.asm @@ -88,9 +88,6 @@ PROLOGUE(_nettle_aes_decrypt) add T, AES_TABLE2, T2 add T, AES_TABLE3, T3 - sll ROUNDS, 4, W0 C Can use W0 as scratch - add KEYS, W0, KEYS C Point to last subkey - C Must be even, and includes the final round srl ROUNDS, 1, ROUNDS C Last two rounds handled specially diff --git a/x86/aes-decrypt-internal.asm b/x86/aes-decrypt-internal.asm index 36d6ca44..d8d980da 100644 --- a/x86/aes-decrypt-internal.asm +++ b/x86/aes-decrypt-internal.asm @@ -90,10 +90,7 @@ PROLOGUE(_nettle_aes_decrypt) jz .Lend shrl $4, PARAM_LENGTH - movl PARAM_ROUNDS, TMP decl PARAM_ROUNDS - shll $4, TMP - addl TMP, PARAM_KEYS .Lblock_loop: movl PARAM_KEYS, KEY C address of subkeys diff --git a/x86_64/aes-decrypt-internal.asm b/x86_64/aes-decrypt-internal.asm index ed753a2c..afa44f35 100644 --- a/x86_64/aes-decrypt-internal.asm +++ b/x86_64/aes-decrypt-internal.asm @@ -83,9 +83,7 @@ PROLOGUE(_nettle_aes_decrypt) push %r15 subl $1, XREG(ROUNDS) - push ROUNDS C Rounds stored at (%rsp) - shl $4, XREG(ROUNDS) C Zero-extends - lea 16(KEYS, ROUNDS), KEYS + push ROUNDS C Rounds stored at (%rsp) mov PARAM_TABLE, TABLE mov PARAM_LENGTH, LENGTH @@ -94,8 +92,8 @@ PROLOGUE(_nettle_aes_decrypt) mov KEYS, KEY AES_LOAD(SA, SB, SC, SD, SRC, KEY) + add $16, SRC C Increment src pointer - add $16, SRC C increment src pointer movl (%rsp), XREG(ROUNDS) sub $16, KEY C point to next key -- GitLab