From ec5c42a8a28b6931a60e26fdd3373dc35c5c52a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Wed, 13 Feb 2002 14:05:04 +0100 Subject: [PATCH] * aes.c (aes_encrypt): Don't unroll the innerloop. (aes_encrypt): Don't unroll the loop for the final round. (aes_decrypt): Likewise, no loop unrolling. Rev: src/nettle/aes.c:1.9 --- aes.c | 329 +++++++++++++--------------------------------------------- 1 file changed, 73 insertions(+), 256 deletions(-) diff --git a/aes.c b/aes.c index a32e6df4..f6db8714 100644 --- a/aes.c +++ b/aes.c @@ -47,19 +47,31 @@ /* Column j are the shifts used when computing t[j]. * Row i is says which byte is used */ -#if AES_SMALL + +/* FIXME: Figure out how the indexing should really be done. It looks + * like this code shifts the rows in the wrong direction, but it + * passes the testsuite. Perhaps the tables are rotated in the wrong + * direction, but I don't think so. */ + +/* The row shift counts C1, C2 and C3 are (1, 2, 3) */ + static const unsigned idx[4][4] = { { 0, 1, 2, 3 }, { 1, 2, 3, 0 }, { 2, 3, 0, 1 }, { 3, 0, 1, 2 } }; - +#if 0 +static const unsigned idx4[4][4] = { + { 0, 4, 8, 12 }, + { 4, 8, 12, 0 }, + { 8, 12, 0, 4 }, + { 12, 0, 4, 8 } }; +#endif static const unsigned iidx[4][4] = { { 0, 1, 2, 3 }, { 3, 0, 1, 2 }, { 2, 3, 0, 1 }, { 1, 2, 3, 0 } }; -#endif /* AES_SMALL */ void aes_encrypt(struct aes_ctx *ctx, @@ -82,53 +94,23 @@ aes_encrypt(struct aes_ctx *ctx, uint32_t t[4]; unsigned j; -#if DEBUG - fprintf(stderr, "encrypt, round: %d\n wtxt: ", round); - for (j = 0; j<4; j++) - fprintf(stderr, "%08x, ", wtxt[j]); - fprintf(stderr, "\n key: "); - for (j = 0; j<4; j++) - fprintf(stderr, "%08x, ", ctx->keys[4*round + j]); - fprintf(stderr, "\n"); -#endif - /* The row shift counts C1, C2 and C3 are (1, 2, 3) */ /* What's the best way to order this loop? Ideally, * we'd want to keep both t and wtxt in registers. */ -#if AES_SMALL for (j=0; j<4; j++) - t[j] = dtable[0][ B0(wtxt[j]) ] ^ - ROTRBYTE( dtable[0][ B1(wtxt[idx[1][j]]) ]^ - ROTRBYTE( dtable[0][ B2(wtxt[idx[2][j]]) ] ^ - ROTRBYTE(dtable[0][ B3(wtxt[idx[3][j]]) ]))); + { +#if AES_SMALL + t[j] = dtable[0][ B0(wtxt[j]) ] ^ + ROTRBYTE( dtable[0][ B1(wtxt[idx[1][j]]) ]^ + ROTRBYTE( dtable[0][ B2(wtxt[idx[2][j]]) ] ^ + ROTRBYTE(dtable[0][ B3(wtxt[idx[3][j]]) ]))); #else /* !AES_SMALL */ - - /* FIXME: Figure out how the indexing should really be done. - * It looks like this code shifts the rows in the wrong - * direction, but it passes the testsuite. */ - t[0] = ( dtable[0][ B0(wtxt[0]) ] - ^ dtable[1][ B1(wtxt[1]) ] - ^ dtable[2][ B2(wtxt[2]) ] - ^ dtable[3][ B3(wtxt[3]) ]); - t[1] = ( dtable[0][ B0(wtxt[1]) ] - ^ dtable[1][ B1(wtxt[2]) ] - ^ dtable[2][ B2(wtxt[3]) ] - ^ dtable[3][ B3(wtxt[0]) ]); - t[2] = ( dtable[0][ B0(wtxt[2]) ] - ^ dtable[1][ B1(wtxt[3]) ] - ^ dtable[2][ B2(wtxt[0]) ] - ^ dtable[3][ B3(wtxt[1]) ]); - t[3] = ( dtable[0][ B0(wtxt[3]) ] - ^ dtable[1][ B1(wtxt[0]) ] - ^ dtable[2][ B2(wtxt[1]) ] - ^ dtable[3][ B3(wtxt[2]) ]); + t[j] = ( dtable[0][ B0(wtxt[idx[0][j]]) ] + ^ dtable[1][ B1(wtxt[idx[1][j]]) ] + ^ dtable[2][ B2(wtxt[idx[2][j]]) ] + ^ dtable[3][ B3(wtxt[idx[3][j]]) ]); #endif /* !AES_SMALL */ -#if DEBUG - fprintf(stderr, "\n t: "); - for (j = 0; j<4; j++) - fprintf(stderr, "%08x, ", t[j]); - fprintf(stderr, "\n"); -#endif + } for (j = 0; j<4; j++) wtxt[j] = t[j] ^ ctx->keys[4*round + j]; @@ -136,63 +118,29 @@ aes_encrypt(struct aes_ctx *ctx, /* Final round */ { uint32_t cipher; - - /* FIXME: Figure out how the indexing should really be done. - * It looks like this code shifts the rows in the wrong - * direction, but it passes the testsuite. */ - - cipher = ( (uint32_t) sbox[ B0(wtxt[0]) ] - | ((uint32_t) sbox[ B1(wtxt[1]) ] << 8) - | ((uint32_t) sbox[ B2(wtxt[2]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[3]) ] << 24)); + unsigned j; + for (j = 0; j<4; j++) + { + /* FIXME: Figure out how the indexing should really be done. + * It looks like this code shifts the rows in the wrong + * direction, but it passes the testsuite. */ + + cipher = ( (uint32_t) sbox[ B0(wtxt[j]) ] + | ((uint32_t) sbox[ B1(wtxt[idx[1][j]]) ] << 8) + | ((uint32_t) sbox[ B2(wtxt[idx[2][j]]) ] << 16) + | ((uint32_t) sbox[ B3(wtxt[idx[3][j]]) ] << 24)); #if DEBUG - fprintf(stderr, " t[0]: %x, key: %x\n", - cipher, ctx->keys[4*round]); + fprintf(stderr, " t[%d]: %x, key: %x\n", + j, cipher, ctx->keys[4*round + j]); #endif - cipher ^= ctx->keys[4*round]; + cipher ^= ctx->keys[4*round + j]; - LE_WRITE_UINT32(dst, cipher); - - cipher = ( (uint32_t) sbox[ B0(wtxt[1]) ] - | ((uint32_t) sbox[ B1(wtxt[2]) ] << 8) - | ((uint32_t) sbox[ B2(wtxt[3]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[0]) ] << 24)); -#if DEBUG - fprintf(stderr, " t[1]: %x, key: %x\n", - cipher, ctx->keys[4*round + 1]); -#endif - cipher ^= ctx->keys[4*round + 1]; - - LE_WRITE_UINT32(dst + 4, cipher); - - cipher = ( (uint32_t) sbox[ B0(wtxt[2]) ] - | ((uint32_t) sbox[ B1(wtxt[3]) ] << 8) - | ((uint32_t) sbox[ B2(wtxt[0]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[1]) ] << 24)); -#if DEBUG - fprintf(stderr, " t[2]: %x, key: %x\n", - cipher, ctx->keys[4*round + 2]); -#endif - cipher ^= ctx->keys[4*round + 2]; - - LE_WRITE_UINT32(dst + 8, cipher); - - cipher = ( (uint32_t) sbox[ B0(wtxt[3]) ] - | ((uint32_t) sbox[ B1(wtxt[0]) ] << 8) - | ((uint32_t) sbox[ B2(wtxt[1]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[2]) ] << 24)); -#if DEBUG - fprintf(stderr, " t[3]: %x, key: %x\n", - cipher, ctx->keys[4*round + 3]); -#endif - cipher ^= ctx->keys[4*round + 3]; - - LE_WRITE_UINT32(dst + 12, cipher); + LE_WRITE_UINT32(dst + 4*j, cipher); + } } } } -#if 1 void aes_decrypt(struct aes_ctx *ctx, unsigned length, uint8_t *dst, @@ -240,33 +188,24 @@ aes_decrypt(struct aes_ctx *ctx, /* What's the best way to order this loop? Ideally, * we'd want to keep both t and wtxt in registers. */ -#if AES_SMALL for (j=0; j<4; j++) - t[j] = itable[0][ B0(wtxt[j]) ] ^ - ROTRBYTE( itable[0][ B1(wtxt[iidx[1][j]]) ]^ - ROTRBYTE( itable[0][ B2(wtxt[iidx[2][j]]) ] ^ - ROTRBYTE(itable[0][ B3(wtxt[iidx[3][j]]) ]))); + { +#if AES_SMALL + t[j] = itable[0][ B0(wtxt[j]) ] ^ + ROTRBYTE( itable[0][ B1(wtxt[iidx[1][j]]) ]^ + ROTRBYTE( itable[0][ B2(wtxt[iidx[2][j]]) ] ^ + ROTRBYTE(itable[0][ B3(wtxt[iidx[3][j]]) ]))); #else /* !AES_SMALL */ - /* FIXME: Figure out how the indexing should really be done. - * It looks like this code shifts the rows in the wrong - * direction, but it passes the testsuite. */ - t[0] = ( itable[0][ B0(wtxt[0]) ] /* 0 1 2 3 */ - ^ itable[1][ B1(wtxt[3]) ] - ^ itable[2][ B2(wtxt[2]) ] - ^ itable[3][ B3(wtxt[1]) ]); - t[1] = ( itable[0][ B0(wtxt[1]) ] /* 3 0 1 2 */ - ^ itable[1][ B1(wtxt[0]) ] - ^ itable[2][ B2(wtxt[3]) ] - ^ itable[3][ B3(wtxt[2]) ]); - t[2] = ( itable[0][ B0(wtxt[2]) ] /* 2 3 0 1 */ - ^ itable[1][ B1(wtxt[1]) ] - ^ itable[2][ B2(wtxt[0]) ] - ^ itable[3][ B3(wtxt[3]) ]); - t[3] = ( itable[0][ B0(wtxt[3]) ] /* 1 2 3 0 */ - ^ itable[1][ B1(wtxt[2]) ] - ^ itable[2][ B2(wtxt[1]) ] - ^ itable[3][ B3(wtxt[0]) ]); + /* FIXME: Figure out how the indexing should really be done. + * It looks like this code shifts the rows in the wrong + * direction, but it passes the testsuite. */ + for (j=0; j<4; j++) + t[j] = ( itable[0][ B0(wtxt[iidx[0][j]]) ] + ^ itable[1][ B1(wtxt[iidx[1][j]]) ] + ^ itable[2][ B2(wtxt[iidx[2][j]]) ] + ^ itable[3][ B3(wtxt[iidx[3][j]]) ]); #endif /* !AES_SMALL */ + } #if DEBUG fprintf(stderr, " t: "); for (j = 0; j<4; j++) @@ -279,148 +218,26 @@ aes_decrypt(struct aes_ctx *ctx, /* Final round */ { uint32_t clear; + unsigned j; + for (j = 0; j<4; j++) + { + /* FIXME: Figure out how the indexing should really be done. + * It looks like this code shifts the rows in the wrong + * direction, but it passes the testsuite. */ + + clear = ( (uint32_t) isbox[ B0(wtxt[j]) ] + | ((uint32_t) isbox[ B1(wtxt[iidx[1][j]]) ] << 8) + | ((uint32_t) isbox[ B2(wtxt[iidx[2][j]]) ] << 16) + | ((uint32_t) isbox[ B3(wtxt[iidx[3][j]]) ] << 24)); - /* FIXME: Figure out how the indexing should really be done. - * It looks like this code shifts the rows in the wrong - * direction, but it passes the testsuite. */ - - clear = ( (uint32_t) isbox[ B0(wtxt[0]) ] - | ((uint32_t) isbox[ B1(wtxt[3]) ] << 8) - | ((uint32_t) isbox[ B2(wtxt[2]) ] << 16) - | ((uint32_t) isbox[ B3(wtxt[1]) ] << 24)); -#if DEBUG - fprintf(stderr, " t[0]: %x, key: %x\n", - clear, ctx->ikeys[4*round]); -#endif - clear ^= ctx->ikeys[4*round]; - - LE_WRITE_UINT32(dst, clear); - - clear = ( (uint32_t) isbox[ B0(wtxt[1]) ] - | ((uint32_t) isbox[ B1(wtxt[0]) ] << 8) - | ((uint32_t) isbox[ B2(wtxt[3]) ] << 16) - | ((uint32_t) isbox[ B3(wtxt[2]) ] << 24)); -#if DEBUG - fprintf(stderr, " t[1]: %x, key: %x\n", - clear, ctx->ikeys[4*round + 1]); -#endif - clear ^= ctx->ikeys[4*round + 1]; - - LE_WRITE_UINT32(dst + 4, clear); - - clear = ( (uint32_t) isbox[ B0(wtxt[2]) ] - | ((uint32_t) isbox[ B1(wtxt[1]) ] << 8) - | ((uint32_t) isbox[ B2(wtxt[0]) ] << 16) - | ((uint32_t) isbox[ B3(wtxt[3]) ] << 24)); #if DEBUG - fprintf(stderr, " t[2]: %x, key: %x\n", - clear, ctx->ikeys[4*round+2]); + fprintf(stderr, " t[%d]: %x, key: %x\n", + j, clear, ctx->ikeys[4*round + j]); #endif - clear ^= ctx->ikeys[4*round + 2]; + clear ^= ctx->ikeys[4*round + j]; - LE_WRITE_UINT32(dst + 8, clear); - - clear = ( (uint32_t) isbox[ B0(wtxt[3]) ] - | ((uint32_t) isbox[ B1(wtxt[2]) ] << 8) - | ((uint32_t) isbox[ B2(wtxt[1]) ] << 16) - | ((uint32_t) isbox[ B3(wtxt[0]) ] << 24)); -#if DEBUG - fprintf(stderr, " t[3]: %x, key: %x\n", - clear, ctx->ikeys[4*round+3]); -#endif - clear ^= ctx->ikeys[4*round + 3]; - - LE_WRITE_UINT32(dst + 12, clear); + LE_WRITE_UINT32(dst + 4*j, clear); + } } } } - -#else -/* Key addition that also packs every byte in the key to a word rep. */ -static void -key_addition_8to32(const uint8_t *txt, const uint32_t *keys, uint32_t *out) -{ - const uint8_t *ptr; - unsigned i, j; - uint32_t val; - - ptr = txt; - for (i=0; i<4; i++) - { - /* FIXME: Use the READ_UINT32 or LE_READ_UINT32 macro. */ - val = 0; - for (j=0; j<4; j++) - val |= (*ptr++ << 8*j); - out[i] = keys[i]^val; - } -} - -static void -key_addition32(const uint32_t *txt, const uint32_t *keys, uint32_t *out) -{ - unsigned i; - - for (i=0; i<4; i++) - out[i] = keys[i] ^ txt[i]; -} - -static void -key_addition32to8(const uint32_t *txt, const uint32_t *keys, uint8_t *out) -{ - uint8_t *ptr; - unsigned i, j; - uint32_t val; - - ptr = out; - for (i=0; i<4; i++) - { - /* FIXME: Use WRITE_UINT32 or LE_WRITE_UINT32 */ - val = txt[i] ^ keys[i]; - for (j=0; j<4; j++) - *ptr++ = (val >> 8*j) & 0xff; - } -} - -void -aes_decrypt(struct aes_ctx *ctx, - unsigned length, uint8_t *dst, - const uint8_t *src) -{ - unsigned r, j; - uint32_t wtxt[4], t[4]; /* working ciphertext */ - uint32_t e; - - assert(!(length % AES_BLOCK_SIZE)); - - for (; length; - length -= AES_BLOCK_SIZE, src += AES_BLOCK_SIZE, dst += AES_BLOCK_SIZE) - { - key_addition_8to32(src, ctx->ikeys + 4*ctx->nrounds, wtxt); - for (r=ctx->nrounds-1; r> 0; r--) - { - for (j=0; j<4; j++) - { - t[j] = itbl[wtxt[j] & 0xff] ^ - ROTRBYTE(itbl[(wtxt[iidx[1][j]] >> 8) & 0xff]^ - ROTRBYTE(itbl[(wtxt[iidx[2][j]] >> 16) & 0xff] ^ - ROTRBYTE(itbl[(wtxt[iidx[3][j]] >> 24) & 0xff]))); - } - key_addition32(t, ctx->ikeys + r*4, wtxt); - } - /* last round is special: there is no mixcolumn, so we can't use the big - tables. */ - for (j=0; j<4; j++) - { - e = wtxt[j] & 0xff; - e |= (wtxt[iidx[1][j]]) & (0xff << 8); - e |= (wtxt[iidx[2][j]]) & (0xff << 16); - e |= (wtxt[iidx[3][j]]) & (0xff << 24); - t[j] = e; - } - for (j=0; j<4; j++) - t[j] = SUBBYTE(t[j], isbox); - - key_addition32to8(t, ctx->ikeys, dst); - } -} -#endif -- GitLab