diff --git a/aes.c b/aes.c index 47a054729dac9b54fed5c1c08b80e9a36b1a3690..a32e6df43fe76f7979d0ff67f7f8074dd9b35cec 100644 --- a/aes.c +++ b/aes.c @@ -32,7 +32,7 @@ #include <assert.h> #ifndef DEBUG -#define DEBUG 0 +# define DEBUG 0 #endif #if DEBUG @@ -45,12 +45,20 @@ #define B2(x) (((x) >> 16) & 0xff) #define B3(x) (((x) >> 24) & 0xff) +/* Column j are the shifts used when computing t[j]. + * Row i is says which byte is used */ #if AES_SMALL static const unsigned idx[4][4] = { { 0, 1, 2, 3 }, { 1, 2, 3, 0 }, { 2, 3, 0, 1 }, { 3, 0, 1, 2 } }; + +static const unsigned iidx[4][4] = { + { 0, 1, 2, 3 }, + { 3, 0, 1, 2 }, + { 2, 3, 0, 1 }, + { 1, 2, 3, 0 } }; #endif /* AES_SMALL */ void @@ -75,71 +83,53 @@ aes_encrypt(struct aes_ctx *ctx, unsigned j; #if DEBUG - fprintf(stderr, "round: %d\n wtxt: ", round); + fprintf(stderr, "encrypt, round: %d\n wtxt: ", round); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", wtxt[j]); fprintf(stderr, "\n key: "); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", ctx->keys[4*round + j]); fprintf(stderr, "\n"); - - fprintf(stderr, - " B0(wtxt[0]): %x\n" - " dtbl[0]: %x\n", - B0(wtxt[0]), dtbl[0][ B0(wtxt[0]) ]); - fprintf(stderr, - " B1(wtxt[1]): %x\n" - " dtbl[1]: %x\n", - B1(wtxt[1]), dtbl[1][ B1(wtxt[1]) ]); - fprintf(stderr, - " B2(wtxt[2]): %x\n" - " dtbl[2]: %x\n", - B2(wtxt[2]), dtbl[2][ B2(wtxt[2]) ]); - fprintf(stderr, - " B3(wtxt[3]): %x\n" - " dtbl[3]: %x\n", - B3(wtxt[3]), dtbl[3][ B3(wtxt[3]) ]); #endif - /* The row shift counts C1, C2 and C3 are (1, 2, 3) */ /* What's the best way to order this loop? Ideally, * we'd want to keep both t and wtxt in registers. */ #if AES_SMALL for (j=0; j<4; j++) - t[j] = dtbl[0][ B0(wtxt[j]) ] ^ - ROTRBYTE( dtbl[0][ B1(wtxt[idx[1][j]]) ]^ - ROTRBYTE( dtbl[0][ B2(wtxt[idx[2][j]]) ] ^ - ROTRBYTE(dtbl[0][ B3(wtxt[idx[3][j]]) ]))); + t[j] = dtable[0][ B0(wtxt[j]) ] ^ + ROTRBYTE( dtable[0][ B1(wtxt[idx[1][j]]) ]^ + ROTRBYTE( dtable[0][ B2(wtxt[idx[2][j]]) ] ^ + ROTRBYTE(dtable[0][ B3(wtxt[idx[3][j]]) ]))); #else /* !AES_SMALL */ /* FIXME: Figure out how the indexing should really be done. * It looks like this code shifts the rows in the wrong * direction, but it passes the testsuite. */ - t[0] = ( dtbl[0][ B0(wtxt[0]) ] - ^ dtbl[1][ B1(wtxt[1]) ] - ^ dtbl[2][ B2(wtxt[2]) ] - ^ dtbl[3][ B3(wtxt[3]) ]); - t[3] = ( dtbl[0][ B0(wtxt[3]) ] - ^ dtbl[1][ B1(wtxt[0]) ] - ^ dtbl[2][ B2(wtxt[1]) ] - ^ dtbl[3][ B3(wtxt[2]) ]); - t[2] = ( dtbl[0][ B0(wtxt[2]) ] - ^ dtbl[1][ B1(wtxt[3]) ] - ^ dtbl[2][ B2(wtxt[0]) ] - ^ dtbl[3][ B3(wtxt[1]) ]); - t[1] = ( dtbl[0][ B0(wtxt[1]) ] - ^ dtbl[1][ B1(wtxt[2]) ] - ^ dtbl[2][ B2(wtxt[3]) ] - ^ dtbl[3][ B3(wtxt[0]) ]); + t[0] = ( dtable[0][ B0(wtxt[0]) ] + ^ dtable[1][ B1(wtxt[1]) ] + ^ dtable[2][ B2(wtxt[2]) ] + ^ dtable[3][ B3(wtxt[3]) ]); + t[1] = ( dtable[0][ B0(wtxt[1]) ] + ^ dtable[1][ B1(wtxt[2]) ] + ^ dtable[2][ B2(wtxt[3]) ] + ^ dtable[3][ B3(wtxt[0]) ]); + t[2] = ( dtable[0][ B0(wtxt[2]) ] + ^ dtable[1][ B1(wtxt[3]) ] + ^ dtable[2][ B2(wtxt[0]) ] + ^ dtable[3][ B3(wtxt[1]) ]); + t[3] = ( dtable[0][ B0(wtxt[3]) ] + ^ dtable[1][ B1(wtxt[0]) ] + ^ dtable[2][ B2(wtxt[1]) ] + ^ dtable[3][ B3(wtxt[2]) ]); #endif /* !AES_SMALL */ - #if DEBUG fprintf(stderr, "\n t: "); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", t[j]); fprintf(stderr, "\n"); #endif + for (j = 0; j<4; j++) wtxt[j] = t[j] ^ ctx->keys[4*round + j]; } @@ -154,39 +144,198 @@ aes_encrypt(struct aes_ctx *ctx, cipher = ( (uint32_t) sbox[ B0(wtxt[0]) ] | ((uint32_t) sbox[ B1(wtxt[1]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[2]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[3]) ] << 24)) - ^ ctx->keys[4*round]; + | ((uint32_t) sbox[ B3(wtxt[3]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[0]: %x, key: %x\n", + cipher, ctx->keys[4*round]); +#endif + cipher ^= ctx->keys[4*round]; LE_WRITE_UINT32(dst, cipher); cipher = ( (uint32_t) sbox[ B0(wtxt[1]) ] | ((uint32_t) sbox[ B1(wtxt[2]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[3]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[0]) ] << 24)) - ^ ctx->keys[4*round + 1]; + | ((uint32_t) sbox[ B3(wtxt[0]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[1]: %x, key: %x\n", + cipher, ctx->keys[4*round + 1]); +#endif + cipher ^= ctx->keys[4*round + 1]; LE_WRITE_UINT32(dst + 4, cipher); cipher = ( (uint32_t) sbox[ B0(wtxt[2]) ] | ((uint32_t) sbox[ B1(wtxt[3]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[0]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[1]) ] << 24)) - ^ ctx->keys[4*round + 2]; + | ((uint32_t) sbox[ B3(wtxt[1]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[2]: %x, key: %x\n", + cipher, ctx->keys[4*round + 2]); +#endif + cipher ^= ctx->keys[4*round + 2]; LE_WRITE_UINT32(dst + 8, cipher); cipher = ( (uint32_t) sbox[ B0(wtxt[3]) ] | ((uint32_t) sbox[ B1(wtxt[0]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[1]) ] << 16) - | ((uint32_t) sbox[ B3(wtxt[2]) ] << 24)) - ^ ctx->keys[4*round + 3]; + | ((uint32_t) sbox[ B3(wtxt[2]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[3]: %x, key: %x\n", + cipher, ctx->keys[4*round + 3]); +#endif + cipher ^= ctx->keys[4*round + 3]; LE_WRITE_UINT32(dst + 12, cipher); } } } - - + +#if 1 +void +aes_decrypt(struct aes_ctx *ctx, + unsigned length, uint8_t *dst, + const uint8_t *src) +{ +#if DEBUG + { + unsigned i, j; + fprintf(stderr, "subkeys:\n"); + for (j = 0; j<=ctx->nrounds; j++) + { + printf(" %d: ", j); + for (i = 0; i<4; i++) + printf("%08x, ", ctx->ikeys[i + 4*j]); + printf("\n"); + } + } +#endif + FOR_BLOCKS(length, dst, src, AES_BLOCK_SIZE) + { + uint32_t wtxt[4]; /* working ciphertext */ + unsigned i; + unsigned round; + + /* Get cipher text, using little-endian byte order. + * Also XOR with the first subkey. */ + for (i = 0; i<4; i++) + wtxt[i] = LE_READ_UINT32(src + 4*i) ^ ctx->ikeys[i]; + + for (round = 1; round < ctx->nrounds; round++) + { + uint32_t t[4]; + unsigned j; + +#if DEBUG + fprintf(stderr, "decrypt, round: %d\n wtxt: ", round); + for (j = 0; j<4; j++) + fprintf(stderr, "%08x, ", wtxt[j]); + fprintf(stderr, "\n key: "); + for (j = 0; j<4; j++) + fprintf(stderr, "%08x, ", ctx->ikeys[4*round + j]); + fprintf(stderr, "\n"); +#endif + /* The row shift counts C1, C2 and C3 are (1, 2, 3) */ + /* What's the best way to order this loop? Ideally, + * we'd want to keep both t and wtxt in registers. */ + +#if AES_SMALL + for (j=0; j<4; j++) + t[j] = itable[0][ B0(wtxt[j]) ] ^ + ROTRBYTE( itable[0][ B1(wtxt[iidx[1][j]]) ]^ + ROTRBYTE( itable[0][ B2(wtxt[iidx[2][j]]) ] ^ + ROTRBYTE(itable[0][ B3(wtxt[iidx[3][j]]) ]))); +#else /* !AES_SMALL */ + /* FIXME: Figure out how the indexing should really be done. + * It looks like this code shifts the rows in the wrong + * direction, but it passes the testsuite. */ + t[0] = ( itable[0][ B0(wtxt[0]) ] /* 0 1 2 3 */ + ^ itable[1][ B1(wtxt[3]) ] + ^ itable[2][ B2(wtxt[2]) ] + ^ itable[3][ B3(wtxt[1]) ]); + t[1] = ( itable[0][ B0(wtxt[1]) ] /* 3 0 1 2 */ + ^ itable[1][ B1(wtxt[0]) ] + ^ itable[2][ B2(wtxt[3]) ] + ^ itable[3][ B3(wtxt[2]) ]); + t[2] = ( itable[0][ B0(wtxt[2]) ] /* 2 3 0 1 */ + ^ itable[1][ B1(wtxt[1]) ] + ^ itable[2][ B2(wtxt[0]) ] + ^ itable[3][ B3(wtxt[3]) ]); + t[3] = ( itable[0][ B0(wtxt[3]) ] /* 1 2 3 0 */ + ^ itable[1][ B1(wtxt[2]) ] + ^ itable[2][ B2(wtxt[1]) ] + ^ itable[3][ B3(wtxt[0]) ]); +#endif /* !AES_SMALL */ +#if DEBUG + fprintf(stderr, " t: "); + for (j = 0; j<4; j++) + fprintf(stderr, "%08x, ", t[j]); + fprintf(stderr, "\n"); +#endif + for (j = 0; j<4; j++) + wtxt[j] = t[j] ^ ctx->ikeys[4*round + j]; + } + /* Final round */ + { + uint32_t clear; + + /* FIXME: Figure out how the indexing should really be done. + * It looks like this code shifts the rows in the wrong + * direction, but it passes the testsuite. */ + + clear = ( (uint32_t) isbox[ B0(wtxt[0]) ] + | ((uint32_t) isbox[ B1(wtxt[3]) ] << 8) + | ((uint32_t) isbox[ B2(wtxt[2]) ] << 16) + | ((uint32_t) isbox[ B3(wtxt[1]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[0]: %x, key: %x\n", + clear, ctx->ikeys[4*round]); +#endif + clear ^= ctx->ikeys[4*round]; + + LE_WRITE_UINT32(dst, clear); + + clear = ( (uint32_t) isbox[ B0(wtxt[1]) ] + | ((uint32_t) isbox[ B1(wtxt[0]) ] << 8) + | ((uint32_t) isbox[ B2(wtxt[3]) ] << 16) + | ((uint32_t) isbox[ B3(wtxt[2]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[1]: %x, key: %x\n", + clear, ctx->ikeys[4*round + 1]); +#endif + clear ^= ctx->ikeys[4*round + 1]; + + LE_WRITE_UINT32(dst + 4, clear); + + clear = ( (uint32_t) isbox[ B0(wtxt[2]) ] + | ((uint32_t) isbox[ B1(wtxt[1]) ] << 8) + | ((uint32_t) isbox[ B2(wtxt[0]) ] << 16) + | ((uint32_t) isbox[ B3(wtxt[3]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[2]: %x, key: %x\n", + clear, ctx->ikeys[4*round+2]); +#endif + clear ^= ctx->ikeys[4*round + 2]; + + LE_WRITE_UINT32(dst + 8, clear); + + clear = ( (uint32_t) isbox[ B0(wtxt[3]) ] + | ((uint32_t) isbox[ B1(wtxt[2]) ] << 8) + | ((uint32_t) isbox[ B2(wtxt[1]) ] << 16) + | ((uint32_t) isbox[ B3(wtxt[0]) ] << 24)); +#if DEBUG + fprintf(stderr, " t[3]: %x, key: %x\n", + clear, ctx->ikeys[4*round+3]); +#endif + clear ^= ctx->ikeys[4*round + 3]; + + LE_WRITE_UINT32(dst + 12, clear); + } + } +} + +#else /* Key addition that also packs every byte in the key to a word rep. */ static void key_addition_8to32(const uint8_t *txt, const uint32_t *keys, uint32_t *out) @@ -232,12 +381,6 @@ key_addition32to8(const uint32_t *txt, const uint32_t *keys, uint8_t *out) } } -static const unsigned iidx[4][4] = { - { 0, 1, 2, 3 }, - { 3, 0, 1, 2 }, - { 2, 3, 0, 1 }, - { 1, 2, 3, 0 } }; - void aes_decrypt(struct aes_ctx *ctx, unsigned length, uint8_t *dst, @@ -280,3 +423,4 @@ aes_decrypt(struct aes_ctx *ctx, key_addition32to8(t, ctx->ikeys, dst); } } +#endif