Commit d0507f2f authored by Niels Möller's avatar Niels Möller

Deleted directories and files that have moved in the tree

Rev: src/nettle/aes.c-hacked:1.2(DEAD)
Rev: src/nettle/sparc/aes-decrypt-internal.asm:1.5(DEAD)
Rev: src/nettle/sparc/aes-encrypt-internal.asm:1.15(DEAD)
Rev: src/nettle/sparc/aes.m4:1.2(DEAD)
Rev: src/nettle/sparc/arcfour-crypt.asm:1.9(DEAD)
Rev: src/nettle/sparc/machine.m4:1.14(DEAD)
Rev: src/nettle/texinfo.tex:1.2(DEAD)
parent a0ff098f
/* aes.c
*
* The aes/rijndael block cipher.
*/
/* nettle, low-level cryptographics library
*
* Copyright (C) 2000, 2001 Rafael R. Sevilla, Niels Mller
*
* The nettle library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* The nettle library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the nettle library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
* MA 02111-1307, USA.
*/
/* Originally written by Rafael R. Sevilla <dido@pacific.net.ph> */
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <assert.h>
#include "aes-internal.h"
#include "macros.h"
#ifndef AES_DEBUG
# define AES_DEBUG 0
#endif
#if AES_DEBUG
# include <stdio.h>
static void
d4(const char *name, unsigned r, const uint32_t *data)
{
unsigned j;
fprintf(stderr, "aes, %d, %s: ", r, name);
for (j = 0; j<4; j++)
fprintf(stderr, "%08x, ", data[j]);
fprintf(stderr, "\n");
}
static void
d2(const char *aname, uint32_t a, const char *bname, uint32_t b)
{
fprintf(stderr, "aes, %s: %08x, %s, %08x\n",
aname, a, bname, b);
}
static void
d1(const char *name, uint32_t a)
{
fprintf(stderr, "aes, %s: %08x\n",
name, a);
}
# define D4(x) d4 x
# define D2(x) d2 x
# define D1(x) d2 x
#else
# define D4(x)
# define D2(x)
# define D1(x)
#endif
/* Get the byte with index 0, 1, 2 and 3 */
#define B0(x) ((x) & 0xff)
#define B1(x) (((x) >> 8) & 0xff)
#define B2(x) (((x) >> 16) & 0xff)
#define B3(x) (((x) >> 24) & 0xff)
#define IDX0(j) (j)
#define IDX1(j) (T->idx[0][j])
#define IDX2(j) (T->idx[1][j])
#define IDX3(j) (T->idx[2][j])
/* NOTE: IDX2 can be done as j ^ 2, but that doesn't seem to make much
* of a difference. */
#define SWAP(a, b) \
do { uint32_t *t_swap = (a); (a) = (b); (b) = t_swap; } while(0)
void
_aes_crypt(const struct aes_ctx *ctx,
const struct aes_table *T,
unsigned length, uint8_t *dst,
const uint8_t *src)
{
FOR_BLOCKS(length, dst, src, AES_BLOCK_SIZE)
{
/* Use double buffering, reading one half of the buffer writing
* to the other, and then swapping the role of the two
* halves. */
uint32_t buffer[8];
uint32_t *wtxt; /* working ciphertext */
uint32_t *tmp;
unsigned i;
unsigned round;
wtxt = buffer; tmp = buffer + 4;
/* Get clear text, using little-endian byte order.
* Also XOR with the first subkey. */
for (i = 0; i<4; i++)
wtxt[i] = LE_READ_UINT32(src + 4*i) ^ ctx->keys[i];
for (round = 1; round < ctx->nrounds; round++)
{
unsigned j;
D4(("wtxt", round, wtxt));
D4(("key", round, &ctx->keys[4*round]));
/* What's the best way to order this loop? Ideally,
* we'd want to keep both t and wtxt in registers. */
for (j=0; j<4; j++)
{
/* FIXME: Figure out how the indexing should really be
* done. With the current idx arrays, it looks like the
* code shifts the rows in the wrong direction. But it
* passes the testsuite. Perhaps the tables are rotated
* in the wrong direction, but I don't think so. */
uint32_t t;
#if AES_SMALL
t = T->table[0][ B0(wtxt[IDX0(j)]) ] ^
ROTRBYTE( T->table[0][ B1(wtxt[IDX1(j)]) ]^
ROTRBYTE( T->table[0][ B2(wtxt[IDX2(j)]) ] ^
ROTRBYTE(T->table[0][ B3(wtxt[IDX3(j)]) ])));
#else /* !AES_SMALL */
t = ( T->table[0][ B0(wtxt[IDX0(j)]) ]
^ T->table[1][ B1(wtxt[IDX1(j)]) ]
^ T->table[2][ B2(wtxt[IDX2(j)]) ]
^ T->table[3][ B3(wtxt[IDX3(j)]) ]);
#endif /* !AES_SMALL */
D1(("t", t));
tmp[j] = t ^ ctx->keys[4*round + j];
}
SWAP(tmp, wtxt);
#if 0
D4(("t", round, t));
for (j = 0; j<4; j++)
wtxt[j] = t[j] ^ ctx->keys[4*round + j];
#endif
}
/* Final round */
{
uint32_t out;
unsigned j;
for (j = 0; j<4; j++)
{
/* FIXME: Figure out how the indexing should really be done.
* It looks like this code shifts the rows in the wrong
* direction, but it passes the testsuite. */
out = ( (uint32_t) T->sbox[ B0(wtxt[IDX0(j)]) ]
| ((uint32_t) T->sbox[ B1(wtxt[IDX1(j)]) ] << 8)
| ((uint32_t) T->sbox[ B2(wtxt[IDX2(j)]) ] << 16)
| ((uint32_t) T->sbox[ B3(wtxt[IDX3(j)]) ] << 24));
D2(("t", out, "key", ctx->keys[4*round + j]));
out ^= ctx->keys[4*round + j];
LE_WRITE_UINT32(dst + 4*j, out);
}
}
}
}
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2002, 2005 Niels Mller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
C MA 02111-1307, USA.
include_src(<sparc/aes.m4>)
C Arguments
define(<CTX>, <%i0>)
define(<T>, <%i1>)
define(<LENGTH>,<%i2>)
define(<DST>, <%i3>)
define(<SRC>, <%i4>)
C AES state, two copies for unrolling
define(<W0>, <%l0>)
define(<W1>, <%l1>)
define(<W2>, <%l2>)
define(<W3>, <%l3>)
define(<X0>, <%l4>)
define(<X1>, <%l5>)
define(<X2>, <%l6>)
define(<X3>, <%l7>)
C %o0-%03 are used for loop invariants T0-T3
define(<KEY>, <%o4>)
define(<ROUND>, <%o5>)
C %g1, %g2, %g3 are TMP1, TMP2 and TMP3
C The sparc32 stack frame looks like
C
C %fp - 4: OS-dependent link field
C %fp - 8: OS-dependent link field
C %fp - 104: OS register save area.
define(<FRAME_SIZE>, 104)
.file "aes-decrypt-internal.asm"
C _aes_decrypt(struct aes_context *ctx,
C const struct aes_table *T,
C unsigned length, uint8_t *dst,
C uint8_t *src)
.section ".text"
.align 16
.proc 020
PROLOGUE(_nettle_aes_decrypt)
save %sp, -FRAME_SIZE, %sp
cmp LENGTH, 0
be .Lend
C Loop invariants
add T, AES_TABLE0, T0
add T, AES_TABLE1, T1
add T, AES_TABLE2, T2
add T, AES_TABLE3, T3
.Lblock_loop:
C Read src, and add initial subkey
add CTX, AES_KEYS, KEY
AES_LOAD(0, SRC, KEY, W0)
AES_LOAD(1, SRC, KEY, W1)
AES_LOAD(2, SRC, KEY, W2)
AES_LOAD(3, SRC, KEY, W3)
C Must be even, and includes the final round
ld [AES_NROUNDS + CTX], ROUND
add SRC, 16, SRC
add KEY, 16, KEY
srl ROUND, 1, ROUND
C Last two rounds handled specially
sub ROUND, 1, ROUND
.Lround_loop:
C The AES_ROUND macro uses T0,... T3
C Transform W -> X
AES_ROUND(0, W0, W3, W2, W1, KEY, X0)
AES_ROUND(1, W1, W0, W3, W2, KEY, X1)
AES_ROUND(2, W2, W1, W0, W3, KEY, X2)
AES_ROUND(3, W3, W2, W1, W0, KEY, X3)
C Transform X -> W
AES_ROUND(4, X0, X3, X2, X1, KEY, W0)
AES_ROUND(5, X1, X0, X3, X2, KEY, W1)
AES_ROUND(6, X2, X1, X0, X3, KEY, W2)
AES_ROUND(7, X3, X2, X1, X0, KEY, W3)
subcc ROUND, 1, ROUND
bne .Lround_loop
add KEY, 32, KEY
C Penultimate round
AES_ROUND(0, W0, W3, W2, W1, KEY, X0)
AES_ROUND(1, W1, W0, W3, W2, KEY, X1)
AES_ROUND(2, W2, W1, W0, W3, KEY, X2)
AES_ROUND(3, W3, W2, W1, W0, KEY, X3)
add KEY, 16, KEY
C Final round
AES_FINAL_ROUND(0, T, X0, X3, X2, X1, KEY, DST)
AES_FINAL_ROUND(1, T, X1, X0, X3, X2, KEY, DST)
AES_FINAL_ROUND(2, T, X2, X1, X0, X3, KEY, DST)
AES_FINAL_ROUND(3, T, X3, X2, X1, X0, KEY, DST)
subcc LENGTH, 16, LENGTH
bne .Lblock_loop
add DST, 16, DST
.Lend:
ret
restore
EPILOGUE(_nettle_aes_decrypt)
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2002, 2005 Niels Mller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
C MA 02111-1307, USA.
include_src(<sparc/aes.m4>)
C Arguments
define(<CTX>, <%i0>)
define(<T>, <%i1>)
define(<LENGTH>,<%i2>)
define(<DST>, <%i3>)
define(<SRC>, <%i4>)
C AES state, two copies for unrolling
define(<W0>, <%l0>)
define(<W1>, <%l1>)
define(<W2>, <%l2>)
define(<W3>, <%l3>)
define(<X0>, <%l4>)
define(<X1>, <%l5>)
define(<X2>, <%l6>)
define(<X3>, <%l7>)
C %o0-%03 are used for loop invariants T0-T3
define(<KEY>, <%o4>)
define(<ROUND>, <%o5>)
C %g1, %g2, %g3 are TMP1, TMP2 and TMP3
C I'm still slightly confused by the frame layout, specified in
C "SYSTEM V APPLICATION BINARY INTERFACE SPARC Processor Supplement".
C However, Sun's cc generates a 104 byte stack frame for a function
C with no local variables, so that should be good enough for us too.
C The sparc32 stack frame looks like
C
C %fp - 4: OS-dependent link field
C %fp - 8: OS-dependent link field
C %fp - 104: OS register save area
define(<FRAME_SIZE>, 104)
.file "aes-encrypt-internal.asm"
C _aes_encrypt(struct aes_context *ctx,
C const struct aes_table *T,
C unsigned length, uint8_t *dst,
C uint8_t *src)
.section ".text"
.align 16
.proc 020
PROLOGUE(_nettle_aes_encrypt)
save %sp, -FRAME_SIZE, %sp
cmp LENGTH, 0
be .Lend
C Loop invariants
add T, AES_TABLE0, T0
add T, AES_TABLE1, T1
add T, AES_TABLE2, T2
add T, AES_TABLE3, T3
.Lblock_loop:
C Read src, and add initial subkey
add CTX, AES_KEYS, KEY
AES_LOAD(0, SRC, KEY, W0)
AES_LOAD(1, SRC, KEY, W1)
AES_LOAD(2, SRC, KEY, W2)
AES_LOAD(3, SRC, KEY, W3)
C Must be even, and includes the final round
ld [AES_NROUNDS + CTX], ROUND
add SRC, 16, SRC
add KEY, 16, KEY
srl ROUND, 1, ROUND
C Last two rounds handled specially
sub ROUND, 1, ROUND
.Lround_loop:
C The AES_ROUND macro uses T0,... T3
C Transform W -> X
AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
C Transform X -> W
AES_ROUND(4, X0, X1, X2, X3, KEY, W0)
AES_ROUND(5, X1, X2, X3, X0, KEY, W1)
AES_ROUND(6, X2, X3, X0, X1, KEY, W2)
AES_ROUND(7, X3, X0, X1, X2, KEY, W3)
subcc ROUND, 1, ROUND
bne .Lround_loop
add KEY, 32, KEY
C Penultimate round
AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
add KEY, 16, KEY
C Final round
AES_FINAL_ROUND(0, T, X0, X1, X2, X3, KEY, DST)
AES_FINAL_ROUND(1, T, X1, X2, X3, X0, KEY, DST)
AES_FINAL_ROUND(2, T, X2, X3, X0, X1, KEY, DST)
AES_FINAL_ROUND(3, T, X3, X0, X1, X2, KEY, DST)
subcc LENGTH, 16, LENGTH
bne .Lblock_loop
add DST, 16, DST
.Lend:
ret
restore
EPILOGUE(_nettle_aes_encrypt)
C Some stats from adriana.lysator.liu.se (SS1000$, 85 MHz), for AES 128
C 1: nettle-1.13 C-code
C 2: nettle-1.13 assembler
C 3: New C-code
C 4: New assembler, first correct version
C 5: New assembler, with basic scheduling of AES_ROUND.
C 6: New assembpler, with loop invariants T0-T3.
C 7: New assembler, with basic scheduling also of AES_FINAL_ROUND.
C MB/s cycles/block Code size (bytes)
C 1 1.2 1107 592
C 2 2.3 572 1032
C 3 2.1 627
C 4 1.8 722
C 5 2.6 496
C 6 3.0 437
C 7 3.1 415 1448
C Used as temporaries by the AES macros
define(<TMP1>, <%g1>)
define(<TMP2>, <%g2>)
define(<TMP3>, <%g3>)
C Loop invariants used by AES_ROUND
define(<T0>, <%o0>)
define(<T1>, <%o1>)
define(<T2>, <%o2>)
define(<T3>, <%o3>)
C AES_LOAD(i, src, key, res)
define(<AES_LOAD>, <
ldub [$2 + 4*$1], $4
ldub [$2 + 4*$1 + 1], TMP1
ldub [$2 + 4*$1 + 2], TMP2
sll TMP1, 8, TMP1
or $4, TMP1, $4
ldub [$2 + 4*$1+3], TMP1
sll TMP2, 16, TMP2
or $4, TMP2, $4
sll TMP1, 24, TMP1
C Get subkey
ld [$3 + 4*$1], TMP2
or $4, TMP1, $4
xor $4, TMP2, $4>)dnl
C AES_ROUND(i, a, b, c, d, key, res)
C Computes one word of the AES round
C FIXME: Could use registers pointing directly to the four tables
C FIXME: Needs better instruction scheduling, and perhaps more temporaries
C Alternatively, we can use a single table and some rotations
define(<AES_ROUND>, <
and $2, 0xff, TMP1 C 0
srl $3, 6, TMP2 C 1
sll TMP1, 2, TMP1 C 0
and TMP2, 0x3fc, TMP2 C 1
ld [T0 + TMP1], $7 C 0 E0
srl $4, 14, TMP1 C 2
ld [T1 + TMP2], TMP2 C 1
and TMP1, 0x3fc, TMP1 C 2
xor $7, TMP2, $7 C 1 E1
srl $5, 22, TMP2 C 3
ld [T2 + TMP1], TMP1 C 2
and TMP2, 0x3fc, TMP2 C 3
xor $7, TMP1, $7 C 2 E2
ld [$6 + 4*$1], TMP1 C 4
ld [T3 + TMP2], TMP2 C 3
xor $7, TMP1, $7 C 4 E4
xor $7, TMP2, $7 C 3 E3
>)dnl
C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst)
C Compute one word in the final round function. Output is converted to
C octets and stored at dst. Relies on AES_SBOX being zero.
define(<AES_FINAL_ROUND>, <
C Load subkey
ld [$7 + 4*$1], TMP3
and $3, 0xff, TMP1 C 0
srl $4, 8, TMP2 C 1
ldub [T + TMP1], TMP1 C 0
and TMP2, 0xff, TMP2 C 1
xor TMP3, TMP1, TMP1 C 0
ldub [T + TMP2], TMP2 C 1
stb TMP1, [$8 + 4*$1] C 0 E0
srl $5, 16, TMP1 C 2
srl TMP3, 8, TMP3 C 1
and TMP1, 0xff, TMP1 C 2
xor TMP3, TMP2, TMP2 C 1
ldub [T + TMP1], TMP1 C 2
stb TMP2, [$8 + 4*$1 + 1] C 1 E1
srl $6, 24, TMP2 C 3
srl TMP3, 8, TMP3 C 2
ldub [T + TMP2], TMP2 C 3
xor TMP3, TMP1, TMP1 C 2
srl TMP3, 8, TMP3 C 3
stb TMP1, [$8 + 4*$1 + 2] C 2 E2
xor TMP3, TMP2, TMP2 C 3
stb TMP2, [$8 + 4*$1 + 3] C 3 E3
>)
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2002, 2005 Niels Mller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
C MA 02111-1307, USA.
C Define to YES, to enable the complex code to special case SRC
C and DST with compatible alignment.
define(<WITH_ALIGN>, <YES>)
C Registers
define(<CTX>, <%i0>)
define(<LENGTH>,<%i1>)
define(<DST>, <%i2>)
define(<SRC>, <%i3>)
define(<I1>, <%i4>)
define(<I2>, <%i5>)
define(<J>, <%g1>)
define(<SI>, <%g2>)
define(<SJ>, <%g3>)
define(<TMP>, <%o0>)
define(<TMP2>, <%o1>)
define(<N>, <%o2>)
define(<DATA>, <%o3>)
C Computes the next byte of the key stream. As input, i must
C already point to the index for the current access, the index
C for the next access is stored in ni. The resulting key byte is
C stored in res.
C ARCFOUR_BYTE(i, ni, res)
define(<ARCFOUR_BYTE>, <
ldub [CTX + $1], SI
add $1, 1, $2
add J, SI, J
and J, 0xff, J
ldub [CTX + J], SJ
and $2, 0xff, $2
stb SI, [CTX + J]
add SI, SJ, SI
and SI, 0xff, SI
stb SJ, [CTX + $1]
ldub [CTX + SI], $3
>)dnl
C FIXME: Consider using the callers window
define(<FRAME_SIZE>, 104)
.file "arcfour-crypt.asm"
C arcfour_crypt(struct arcfour_ctx *ctx,
C unsigned length, uint8_t *dst,
C const uint8_t *src)
.section ".text"
.align 16
.proc 020
PROLOGUE(nettle_arcfour_crypt)
save %sp, -FRAME_SIZE, %sp
cmp LENGTH, 0
be .Lend
nop
C Load both I and J
lduh [CTX + ARCFOUR_I], I1
and I1, 0xff, J
srl I1, 8, I1
C We want an even address for DST
andcc DST, 1, %g0
add I1, 1 ,I1
beq .Laligned2
and I1, 0xff, I1
mov I1, I2