Commit ec5c42a8 authored by Niels Möller's avatar Niels Möller

* aes.c (aes_encrypt): Don't unroll the innerloop.

(aes_encrypt): Don't unroll the loop for the final round.
(aes_decrypt): Likewise, no loop unrolling.

Rev: src/nettle/aes.c:1.9
parent d6ff420b
......@@ -47,19 +47,31 @@
/* Column j are the shifts used when computing t[j].
* Row i is says which byte is used */
#if AES_SMALL
/* FIXME: Figure out how the indexing should really be done. It looks
* like this code shifts the rows in the wrong direction, but it
* passes the testsuite. Perhaps the tables are rotated in the wrong
* direction, but I don't think so. */
/* The row shift counts C1, C2 and C3 are (1, 2, 3) */
static const unsigned idx[4][4] = {
{ 0, 1, 2, 3 },
{ 1, 2, 3, 0 },
{ 2, 3, 0, 1 },
{ 3, 0, 1, 2 } };
#if 0
static const unsigned idx4[4][4] = {
{ 0, 4, 8, 12 },
{ 4, 8, 12, 0 },
{ 8, 12, 0, 4 },
{ 12, 0, 4, 8 } };
#endif
static const unsigned iidx[4][4] = {
{ 0, 1, 2, 3 },
{ 3, 0, 1, 2 },
{ 2, 3, 0, 1 },
{ 1, 2, 3, 0 } };
#endif /* AES_SMALL */
void
aes_encrypt(struct aes_ctx *ctx,
......@@ -82,53 +94,23 @@ aes_encrypt(struct aes_ctx *ctx,
uint32_t t[4];
unsigned j;
#if DEBUG
fprintf(stderr, "encrypt, round: %d\n wtxt: ", round);
for (j = 0; j<4; j++)
fprintf(stderr, "%08x, ", wtxt[j]);
fprintf(stderr, "\n key: ");
for (j = 0; j<4; j++)
fprintf(stderr, "%08x, ", ctx->keys[4*round + j]);
fprintf(stderr, "\n");
#endif
/* The row shift counts C1, C2 and C3 are (1, 2, 3) */
/* What's the best way to order this loop? Ideally,
* we'd want to keep both t and wtxt in registers. */
#if AES_SMALL
for (j=0; j<4; j++)
t[j] = dtable[0][ B0(wtxt[j]) ] ^
ROTRBYTE( dtable[0][ B1(wtxt[idx[1][j]]) ]^
ROTRBYTE( dtable[0][ B2(wtxt[idx[2][j]]) ] ^
ROTRBYTE(dtable[0][ B3(wtxt[idx[3][j]]) ])));
{
#if AES_SMALL
t[j] = dtable[0][ B0(wtxt[j]) ] ^
ROTRBYTE( dtable[0][ B1(wtxt[idx[1][j]]) ]^
ROTRBYTE( dtable[0][ B2(wtxt[idx[2][j]]) ] ^
ROTRBYTE(dtable[0][ B3(wtxt[idx[3][j]]) ])));
#else /* !AES_SMALL */
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
t[0] = ( dtable[0][ B0(wtxt[0]) ]
^ dtable[1][ B1(wtxt[1]) ]
^ dtable[2][ B2(wtxt[2]) ]
^ dtable[3][ B3(wtxt[3]) ]);
t[1] = ( dtable[0][ B0(wtxt[1]) ]
^ dtable[1][ B1(wtxt[2]) ]
^ dtable[2][ B2(wtxt[3]) ]
^ dtable[3][ B3(wtxt[0]) ]);
t[2] = ( dtable[0][ B0(wtxt[2]) ]
^ dtable[1][ B1(wtxt[3]) ]
^ dtable[2][ B2(wtxt[0]) ]
^ dtable[3][ B3(wtxt[1]) ]);
t[3] = ( dtable[0][ B0(wtxt[3]) ]
^ dtable[1][ B1(wtxt[0]) ]
^ dtable[2][ B2(wtxt[1]) ]
^ dtable[3][ B3(wtxt[2]) ]);
t[j] = ( dtable[0][ B0(wtxt[idx[0][j]]) ]
^ dtable[1][ B1(wtxt[idx[1][j]]) ]
^ dtable[2][ B2(wtxt[idx[2][j]]) ]
^ dtable[3][ B3(wtxt[idx[3][j]]) ]);
#endif /* !AES_SMALL */
#if DEBUG
fprintf(stderr, "\n t: ");
for (j = 0; j<4; j++)
fprintf(stderr, "%08x, ", t[j]);
fprintf(stderr, "\n");
#endif
}
for (j = 0; j<4; j++)
wtxt[j] = t[j] ^ ctx->keys[4*round + j];
......@@ -136,63 +118,29 @@ aes_encrypt(struct aes_ctx *ctx,
/* Final round */
{
uint32_t cipher;
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
cipher = ( (uint32_t) sbox[ B0(wtxt[0]) ]
| ((uint32_t) sbox[ B1(wtxt[1]) ] << 8)
| ((uint32_t) sbox[ B2(wtxt[2]) ] << 16)
| ((uint32_t) sbox[ B3(wtxt[3]) ] << 24));
unsigned j;
for (j = 0; j<4; j++)
{
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
cipher = ( (uint32_t) sbox[ B0(wtxt[j]) ]
| ((uint32_t) sbox[ B1(wtxt[idx[1][j]]) ] << 8)
| ((uint32_t) sbox[ B2(wtxt[idx[2][j]]) ] << 16)
| ((uint32_t) sbox[ B3(wtxt[idx[3][j]]) ] << 24));
#if DEBUG
fprintf(stderr, " t[0]: %x, key: %x\n",
cipher, ctx->keys[4*round]);
fprintf(stderr, " t[%d]: %x, key: %x\n",
j, cipher, ctx->keys[4*round + j]);
#endif
cipher ^= ctx->keys[4*round];
cipher ^= ctx->keys[4*round + j];
LE_WRITE_UINT32(dst, cipher);
cipher = ( (uint32_t) sbox[ B0(wtxt[1]) ]
| ((uint32_t) sbox[ B1(wtxt[2]) ] << 8)
| ((uint32_t) sbox[ B2(wtxt[3]) ] << 16)
| ((uint32_t) sbox[ B3(wtxt[0]) ] << 24));
#if DEBUG
fprintf(stderr, " t[1]: %x, key: %x\n",
cipher, ctx->keys[4*round + 1]);
#endif
cipher ^= ctx->keys[4*round + 1];
LE_WRITE_UINT32(dst + 4, cipher);
cipher = ( (uint32_t) sbox[ B0(wtxt[2]) ]
| ((uint32_t) sbox[ B1(wtxt[3]) ] << 8)
| ((uint32_t) sbox[ B2(wtxt[0]) ] << 16)
| ((uint32_t) sbox[ B3(wtxt[1]) ] << 24));
#if DEBUG
fprintf(stderr, " t[2]: %x, key: %x\n",
cipher, ctx->keys[4*round + 2]);
#endif
cipher ^= ctx->keys[4*round + 2];
LE_WRITE_UINT32(dst + 8, cipher);
cipher = ( (uint32_t) sbox[ B0(wtxt[3]) ]
| ((uint32_t) sbox[ B1(wtxt[0]) ] << 8)
| ((uint32_t) sbox[ B2(wtxt[1]) ] << 16)
| ((uint32_t) sbox[ B3(wtxt[2]) ] << 24));
#if DEBUG
fprintf(stderr, " t[3]: %x, key: %x\n",
cipher, ctx->keys[4*round + 3]);
#endif
cipher ^= ctx->keys[4*round + 3];
LE_WRITE_UINT32(dst + 12, cipher);
LE_WRITE_UINT32(dst + 4*j, cipher);
}
}
}
}
#if 1
void
aes_decrypt(struct aes_ctx *ctx,
unsigned length, uint8_t *dst,
......@@ -240,33 +188,24 @@ aes_decrypt(struct aes_ctx *ctx,
/* What's the best way to order this loop? Ideally,
* we'd want to keep both t and wtxt in registers. */
#if AES_SMALL
for (j=0; j<4; j++)
t[j] = itable[0][ B0(wtxt[j]) ] ^
ROTRBYTE( itable[0][ B1(wtxt[iidx[1][j]]) ]^
ROTRBYTE( itable[0][ B2(wtxt[iidx[2][j]]) ] ^
ROTRBYTE(itable[0][ B3(wtxt[iidx[3][j]]) ])));
{
#if AES_SMALL
t[j] = itable[0][ B0(wtxt[j]) ] ^
ROTRBYTE( itable[0][ B1(wtxt[iidx[1][j]]) ]^
ROTRBYTE( itable[0][ B2(wtxt[iidx[2][j]]) ] ^
ROTRBYTE(itable[0][ B3(wtxt[iidx[3][j]]) ])));
#else /* !AES_SMALL */
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
t[0] = ( itable[0][ B0(wtxt[0]) ] /* 0 1 2 3 */
^ itable[1][ B1(wtxt[3]) ]
^ itable[2][ B2(wtxt[2]) ]
^ itable[3][ B3(wtxt[1]) ]);
t[1] = ( itable[0][ B0(wtxt[1]) ] /* 3 0 1 2 */
^ itable[1][ B1(wtxt[0]) ]
^ itable[2][ B2(wtxt[3]) ]
^ itable[3][ B3(wtxt[2]) ]);
t[2] = ( itable[0][ B0(wtxt[2]) ] /* 2 3 0 1 */
^ itable[1][ B1(wtxt[1]) ]
^ itable[2][ B2(wtxt[0]) ]
^ itable[3][ B3(wtxt[3]) ]);
t[3] = ( itable[0][ B0(wtxt[3]) ] /* 1 2 3 0 */
^ itable[1][ B1(wtxt[2]) ]
^ itable[2][ B2(wtxt[1]) ]
^ itable[3][ B3(wtxt[0]) ]);
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
for (j=0; j<4; j++)
t[j] = ( itable[0][ B0(wtxt[iidx[0][j]]) ]
^ itable[1][ B1(wtxt[iidx[1][j]]) ]
^ itable[2][ B2(wtxt[iidx[2][j]]) ]
^ itable[3][ B3(wtxt[iidx[3][j]]) ]);
#endif /* !AES_SMALL */
}
#if DEBUG
fprintf(stderr, " t: ");
for (j = 0; j<4; j++)
......@@ -279,148 +218,26 @@ aes_decrypt(struct aes_ctx *ctx,
/* Final round */
{
uint32_t clear;
unsigned j;
for (j = 0; j<4; j++)
{
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
clear = ( (uint32_t) isbox[ B0(wtxt[j]) ]
| ((uint32_t) isbox[ B1(wtxt[iidx[1][j]]) ] << 8)
| ((uint32_t) isbox[ B2(wtxt[iidx[2][j]]) ] << 16)
| ((uint32_t) isbox[ B3(wtxt[iidx[3][j]]) ] << 24));
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
clear = ( (uint32_t) isbox[ B0(wtxt[0]) ]
| ((uint32_t) isbox[ B1(wtxt[3]) ] << 8)
| ((uint32_t) isbox[ B2(wtxt[2]) ] << 16)
| ((uint32_t) isbox[ B3(wtxt[1]) ] << 24));
#if DEBUG
fprintf(stderr, " t[0]: %x, key: %x\n",
clear, ctx->ikeys[4*round]);
#endif
clear ^= ctx->ikeys[4*round];
LE_WRITE_UINT32(dst, clear);
clear = ( (uint32_t) isbox[ B0(wtxt[1]) ]
| ((uint32_t) isbox[ B1(wtxt[0]) ] << 8)
| ((uint32_t) isbox[ B2(wtxt[3]) ] << 16)
| ((uint32_t) isbox[ B3(wtxt[2]) ] << 24));
#if DEBUG
fprintf(stderr, " t[1]: %x, key: %x\n",
clear, ctx->ikeys[4*round + 1]);
#endif
clear ^= ctx->ikeys[4*round + 1];
LE_WRITE_UINT32(dst + 4, clear);
clear = ( (uint32_t) isbox[ B0(wtxt[2]) ]
| ((uint32_t) isbox[ B1(wtxt[1]) ] << 8)
| ((uint32_t) isbox[ B2(wtxt[0]) ] << 16)
| ((uint32_t) isbox[ B3(wtxt[3]) ] << 24));
#if DEBUG
fprintf(stderr, " t[2]: %x, key: %x\n",
clear, ctx->ikeys[4*round+2]);
fprintf(stderr, " t[%d]: %x, key: %x\n",
j, clear, ctx->ikeys[4*round + j]);
#endif
clear ^= ctx->ikeys[4*round + 2];
clear ^= ctx->ikeys[4*round + j];
LE_WRITE_UINT32(dst + 8, clear);
clear = ( (uint32_t) isbox[ B0(wtxt[3]) ]
| ((uint32_t) isbox[ B1(wtxt[2]) ] << 8)
| ((uint32_t) isbox[ B2(wtxt[1]) ] << 16)
| ((uint32_t) isbox[ B3(wtxt[0]) ] << 24));
#if DEBUG
fprintf(stderr, " t[3]: %x, key: %x\n",
clear, ctx->ikeys[4*round+3]);
#endif
clear ^= ctx->ikeys[4*round + 3];
LE_WRITE_UINT32(dst + 12, clear);
LE_WRITE_UINT32(dst + 4*j, clear);
}
}
}
}
#else
/* Key addition that also packs every byte in the key to a word rep. */
static void
key_addition_8to32(const uint8_t *txt, const uint32_t *keys, uint32_t *out)
{
const uint8_t *ptr;
unsigned i, j;
uint32_t val;
ptr = txt;
for (i=0; i<4; i++)
{
/* FIXME: Use the READ_UINT32 or LE_READ_UINT32 macro. */
val = 0;
for (j=0; j<4; j++)
val |= (*ptr++ << 8*j);
out[i] = keys[i]^val;
}
}
static void
key_addition32(const uint32_t *txt, const uint32_t *keys, uint32_t *out)
{
unsigned i;
for (i=0; i<4; i++)
out[i] = keys[i] ^ txt[i];
}
static void
key_addition32to8(const uint32_t *txt, const uint32_t *keys, uint8_t *out)
{
uint8_t *ptr;
unsigned i, j;
uint32_t val;
ptr = out;
for (i=0; i<4; i++)
{
/* FIXME: Use WRITE_UINT32 or LE_WRITE_UINT32 */
val = txt[i] ^ keys[i];
for (j=0; j<4; j++)
*ptr++ = (val >> 8*j) & 0xff;
}
}
void
aes_decrypt(struct aes_ctx *ctx,
unsigned length, uint8_t *dst,
const uint8_t *src)
{
unsigned r, j;
uint32_t wtxt[4], t[4]; /* working ciphertext */
uint32_t e;
assert(!(length % AES_BLOCK_SIZE));
for (; length;
length -= AES_BLOCK_SIZE, src += AES_BLOCK_SIZE, dst += AES_BLOCK_SIZE)
{
key_addition_8to32(src, ctx->ikeys + 4*ctx->nrounds, wtxt);
for (r=ctx->nrounds-1; r> 0; r--)
{
for (j=0; j<4; j++)
{
t[j] = itbl[wtxt[j] & 0xff] ^
ROTRBYTE(itbl[(wtxt[iidx[1][j]] >> 8) & 0xff]^
ROTRBYTE(itbl[(wtxt[iidx[2][j]] >> 16) & 0xff] ^
ROTRBYTE(itbl[(wtxt[iidx[3][j]] >> 24) & 0xff])));
}
key_addition32(t, ctx->ikeys + r*4, wtxt);
}
/* last round is special: there is no mixcolumn, so we can't use the big
tables. */
for (j=0; j<4; j++)
{
e = wtxt[j] & 0xff;
e |= (wtxt[iidx[1][j]]) & (0xff << 8);
e |= (wtxt[iidx[2][j]]) & (0xff << 16);
e |= (wtxt[iidx[3][j]]) & (0xff << 24);
t[j] = e;
}
for (j=0; j<4; j++)
t[j] = SUBBYTE(t[j], isbox);
key_addition32to8(t, ctx->ikeys, dst);
}
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment