/* aes.c * * The aes/rijndael block cipher. */ /* nettle, low-level cryptographics library * * Copyright (C) 2000, 2001 Rafael R. Sevilla, Niels M�ller * * The nettle library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at your * option) any later version. * * The nettle library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the nettle library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. */ /* Originally written by Rafael R. Sevilla <dido@pacific.net.ph> */ #include "aes-internal.h" #include "macros.h" #include <assert.h> #ifndef DEBUG # define DEBUG 0 #endif #if DEBUG # include <stdio.h> #endif /* Get the byte with index 0, 1, 2 and 3 */ #define B0(x) ((x) & 0xff) #define B1(x) (((x) >> 8) & 0xff) #define B2(x) (((x) >> 16) & 0xff) #define B3(x) (((x) >> 24) & 0xff) /* Column j are the shifts used when computing t[j]. * Row i is says which byte is used */ #if AES_SMALL static const unsigned idx[4][4] = { { 0, 1, 2, 3 }, { 1, 2, 3, 0 }, { 2, 3, 0, 1 }, { 3, 0, 1, 2 } }; static const unsigned iidx[4][4] = { { 0, 1, 2, 3 }, { 3, 0, 1, 2 }, { 2, 3, 0, 1 }, { 1, 2, 3, 0 } }; #endif /* AES_SMALL */ void aes_encrypt(struct aes_ctx *ctx, unsigned length, uint8_t *dst, const uint8_t *src) { FOR_BLOCKS(length, dst, src, AES_BLOCK_SIZE) { uint32_t wtxt[4]; /* working ciphertext */ unsigned i; unsigned round; /* Get clear text, using little-endian byte order. * Also XOR with the first subkey. */ for (i = 0; i<4; i++) wtxt[i] = LE_READ_UINT32(src + 4*i) ^ ctx->keys[i]; for (round = 1; round < ctx->nrounds; round++) { uint32_t t[4]; unsigned j; #if DEBUG fprintf(stderr, "encrypt, round: %d\n wtxt: ", round); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", wtxt[j]); fprintf(stderr, "\n key: "); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", ctx->keys[4*round + j]); fprintf(stderr, "\n"); #endif /* The row shift counts C1, C2 and C3 are (1, 2, 3) */ /* What's the best way to order this loop? Ideally, * we'd want to keep both t and wtxt in registers. */ #if AES_SMALL for (j=0; j<4; j++) t[j] = dtable[0][ B0(wtxt[j]) ] ^ ROTRBYTE( dtable[0][ B1(wtxt[idx[1][j]]) ]^ ROTRBYTE( dtable[0][ B2(wtxt[idx[2][j]]) ] ^ ROTRBYTE(dtable[0][ B3(wtxt[idx[3][j]]) ]))); #else /* !AES_SMALL */ /* FIXME: Figure out how the indexing should really be done. * It looks like this code shifts the rows in the wrong * direction, but it passes the testsuite. */ t[0] = ( dtable[0][ B0(wtxt[0]) ] ^ dtable[1][ B1(wtxt[1]) ] ^ dtable[2][ B2(wtxt[2]) ] ^ dtable[3][ B3(wtxt[3]) ]); t[1] = ( dtable[0][ B0(wtxt[1]) ] ^ dtable[1][ B1(wtxt[2]) ] ^ dtable[2][ B2(wtxt[3]) ] ^ dtable[3][ B3(wtxt[0]) ]); t[2] = ( dtable[0][ B0(wtxt[2]) ] ^ dtable[1][ B1(wtxt[3]) ] ^ dtable[2][ B2(wtxt[0]) ] ^ dtable[3][ B3(wtxt[1]) ]); t[3] = ( dtable[0][ B0(wtxt[3]) ] ^ dtable[1][ B1(wtxt[0]) ] ^ dtable[2][ B2(wtxt[1]) ] ^ dtable[3][ B3(wtxt[2]) ]); #endif /* !AES_SMALL */ #if DEBUG fprintf(stderr, "\n t: "); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", t[j]); fprintf(stderr, "\n"); #endif for (j = 0; j<4; j++) wtxt[j] = t[j] ^ ctx->keys[4*round + j]; } /* Final round */ { uint32_t cipher; /* FIXME: Figure out how the indexing should really be done. * It looks like this code shifts the rows in the wrong * direction, but it passes the testsuite. */ cipher = ( (uint32_t) sbox[ B0(wtxt[0]) ] | ((uint32_t) sbox[ B1(wtxt[1]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[2]) ] << 16) | ((uint32_t) sbox[ B3(wtxt[3]) ] << 24)); #if DEBUG fprintf(stderr, " t[0]: %x, key: %x\n", cipher, ctx->keys[4*round]); #endif cipher ^= ctx->keys[4*round]; LE_WRITE_UINT32(dst, cipher); cipher = ( (uint32_t) sbox[ B0(wtxt[1]) ] | ((uint32_t) sbox[ B1(wtxt[2]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[3]) ] << 16) | ((uint32_t) sbox[ B3(wtxt[0]) ] << 24)); #if DEBUG fprintf(stderr, " t[1]: %x, key: %x\n", cipher, ctx->keys[4*round + 1]); #endif cipher ^= ctx->keys[4*round + 1]; LE_WRITE_UINT32(dst + 4, cipher); cipher = ( (uint32_t) sbox[ B0(wtxt[2]) ] | ((uint32_t) sbox[ B1(wtxt[3]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[0]) ] << 16) | ((uint32_t) sbox[ B3(wtxt[1]) ] << 24)); #if DEBUG fprintf(stderr, " t[2]: %x, key: %x\n", cipher, ctx->keys[4*round + 2]); #endif cipher ^= ctx->keys[4*round + 2]; LE_WRITE_UINT32(dst + 8, cipher); cipher = ( (uint32_t) sbox[ B0(wtxt[3]) ] | ((uint32_t) sbox[ B1(wtxt[0]) ] << 8) | ((uint32_t) sbox[ B2(wtxt[1]) ] << 16) | ((uint32_t) sbox[ B3(wtxt[2]) ] << 24)); #if DEBUG fprintf(stderr, " t[3]: %x, key: %x\n", cipher, ctx->keys[4*round + 3]); #endif cipher ^= ctx->keys[4*round + 3]; LE_WRITE_UINT32(dst + 12, cipher); } } } #if 1 void aes_decrypt(struct aes_ctx *ctx, unsigned length, uint8_t *dst, const uint8_t *src) { #if DEBUG { unsigned i, j; fprintf(stderr, "subkeys:\n"); for (j = 0; j<=ctx->nrounds; j++) { printf(" %d: ", j); for (i = 0; i<4; i++) printf("%08x, ", ctx->ikeys[i + 4*j]); printf("\n"); } } #endif FOR_BLOCKS(length, dst, src, AES_BLOCK_SIZE) { uint32_t wtxt[4]; /* working ciphertext */ unsigned i; unsigned round; /* Get cipher text, using little-endian byte order. * Also XOR with the first subkey. */ for (i = 0; i<4; i++) wtxt[i] = LE_READ_UINT32(src + 4*i) ^ ctx->ikeys[i]; for (round = 1; round < ctx->nrounds; round++) { uint32_t t[4]; unsigned j; #if DEBUG fprintf(stderr, "decrypt, round: %d\n wtxt: ", round); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", wtxt[j]); fprintf(stderr, "\n key: "); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", ctx->ikeys[4*round + j]); fprintf(stderr, "\n"); #endif /* The row shift counts C1, C2 and C3 are (1, 2, 3) */ /* What's the best way to order this loop? Ideally, * we'd want to keep both t and wtxt in registers. */ #if AES_SMALL for (j=0; j<4; j++) t[j] = itable[0][ B0(wtxt[j]) ] ^ ROTRBYTE( itable[0][ B1(wtxt[iidx[1][j]]) ]^ ROTRBYTE( itable[0][ B2(wtxt[iidx[2][j]]) ] ^ ROTRBYTE(itable[0][ B3(wtxt[iidx[3][j]]) ]))); #else /* !AES_SMALL */ /* FIXME: Figure out how the indexing should really be done. * It looks like this code shifts the rows in the wrong * direction, but it passes the testsuite. */ t[0] = ( itable[0][ B0(wtxt[0]) ] /* 0 1 2 3 */ ^ itable[1][ B1(wtxt[3]) ] ^ itable[2][ B2(wtxt[2]) ] ^ itable[3][ B3(wtxt[1]) ]); t[1] = ( itable[0][ B0(wtxt[1]) ] /* 3 0 1 2 */ ^ itable[1][ B1(wtxt[0]) ] ^ itable[2][ B2(wtxt[3]) ] ^ itable[3][ B3(wtxt[2]) ]); t[2] = ( itable[0][ B0(wtxt[2]) ] /* 2 3 0 1 */ ^ itable[1][ B1(wtxt[1]) ] ^ itable[2][ B2(wtxt[0]) ] ^ itable[3][ B3(wtxt[3]) ]); t[3] = ( itable[0][ B0(wtxt[3]) ] /* 1 2 3 0 */ ^ itable[1][ B1(wtxt[2]) ] ^ itable[2][ B2(wtxt[1]) ] ^ itable[3][ B3(wtxt[0]) ]); #endif /* !AES_SMALL */ #if DEBUG fprintf(stderr, " t: "); for (j = 0; j<4; j++) fprintf(stderr, "%08x, ", t[j]); fprintf(stderr, "\n"); #endif for (j = 0; j<4; j++) wtxt[j] = t[j] ^ ctx->ikeys[4*round + j]; } /* Final round */ { uint32_t clear; /* FIXME: Figure out how the indexing should really be done. * It looks like this code shifts the rows in the wrong * direction, but it passes the testsuite. */ clear = ( (uint32_t) isbox[ B0(wtxt[0]) ] | ((uint32_t) isbox[ B1(wtxt[3]) ] << 8) | ((uint32_t) isbox[ B2(wtxt[2]) ] << 16) | ((uint32_t) isbox[ B3(wtxt[1]) ] << 24)); #if DEBUG fprintf(stderr, " t[0]: %x, key: %x\n", clear, ctx->ikeys[4*round]); #endif clear ^= ctx->ikeys[4*round]; LE_WRITE_UINT32(dst, clear); clear = ( (uint32_t) isbox[ B0(wtxt[1]) ] | ((uint32_t) isbox[ B1(wtxt[0]) ] << 8) | ((uint32_t) isbox[ B2(wtxt[3]) ] << 16) | ((uint32_t) isbox[ B3(wtxt[2]) ] << 24)); #if DEBUG fprintf(stderr, " t[1]: %x, key: %x\n", clear, ctx->ikeys[4*round + 1]); #endif clear ^= ctx->ikeys[4*round + 1]; LE_WRITE_UINT32(dst + 4, clear); clear = ( (uint32_t) isbox[ B0(wtxt[2]) ] | ((uint32_t) isbox[ B1(wtxt[1]) ] << 8) | ((uint32_t) isbox[ B2(wtxt[0]) ] << 16) | ((uint32_t) isbox[ B3(wtxt[3]) ] << 24)); #if DEBUG fprintf(stderr, " t[2]: %x, key: %x\n", clear, ctx->ikeys[4*round+2]); #endif clear ^= ctx->ikeys[4*round + 2]; LE_WRITE_UINT32(dst + 8, clear); clear = ( (uint32_t) isbox[ B0(wtxt[3]) ] | ((uint32_t) isbox[ B1(wtxt[2]) ] << 8) | ((uint32_t) isbox[ B2(wtxt[1]) ] << 16) | ((uint32_t) isbox[ B3(wtxt[0]) ] << 24)); #if DEBUG fprintf(stderr, " t[3]: %x, key: %x\n", clear, ctx->ikeys[4*round+3]); #endif clear ^= ctx->ikeys[4*round + 3]; LE_WRITE_UINT32(dst + 12, clear); } } } #else /* Key addition that also packs every byte in the key to a word rep. */ static void key_addition_8to32(const uint8_t *txt, const uint32_t *keys, uint32_t *out) { const uint8_t *ptr; unsigned i, j; uint32_t val; ptr = txt; for (i=0; i<4; i++) { /* FIXME: Use the READ_UINT32 or LE_READ_UINT32 macro. */ val = 0; for (j=0; j<4; j++) val |= (*ptr++ << 8*j); out[i] = keys[i]^val; } } static void key_addition32(const uint32_t *txt, const uint32_t *keys, uint32_t *out) { unsigned i; for (i=0; i<4; i++) out[i] = keys[i] ^ txt[i]; } static void key_addition32to8(const uint32_t *txt, const uint32_t *keys, uint8_t *out) { uint8_t *ptr; unsigned i, j; uint32_t val; ptr = out; for (i=0; i<4; i++) { /* FIXME: Use WRITE_UINT32 or LE_WRITE_UINT32 */ val = txt[i] ^ keys[i]; for (j=0; j<4; j++) *ptr++ = (val >> 8*j) & 0xff; } } void aes_decrypt(struct aes_ctx *ctx, unsigned length, uint8_t *dst, const uint8_t *src) { unsigned r, j; uint32_t wtxt[4], t[4]; /* working ciphertext */ uint32_t e; assert(!(length % AES_BLOCK_SIZE)); for (; length; length -= AES_BLOCK_SIZE, src += AES_BLOCK_SIZE, dst += AES_BLOCK_SIZE) { key_addition_8to32(src, ctx->ikeys + 4*ctx->nrounds, wtxt); for (r=ctx->nrounds-1; r> 0; r--) { for (j=0; j<4; j++) { t[j] = itbl[wtxt[j] & 0xff] ^ ROTRBYTE(itbl[(wtxt[iidx[1][j]] >> 8) & 0xff]^ ROTRBYTE(itbl[(wtxt[iidx[2][j]] >> 16) & 0xff] ^ ROTRBYTE(itbl[(wtxt[iidx[3][j]] >> 24) & 0xff]))); } key_addition32(t, ctx->ikeys + r*4, wtxt); } /* last round is special: there is no mixcolumn, so we can't use the big tables. */ for (j=0; j<4; j++) { e = wtxt[j] & 0xff; e |= (wtxt[iidx[1][j]]) & (0xff << 8); e |= (wtxt[iidx[2][j]]) & (0xff << 16); e |= (wtxt[iidx[3][j]]) & (0xff << 24); t[j] = e; } for (j=0; j<4; j++) t[j] = SUBBYTE(t[j], isbox); key_addition32to8(t, ctx->ikeys, dst); } } #endif