From c4796d9c83961b0ca914ee17c69adfea1d3a0442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Thu, 5 Apr 2007 16:20:39 +0200 Subject: [PATCH] Moved in CVS tree. Also renamed directory sparc to sparc32. Rev: nettle/.bootstrap:1.1 Rev: nettle/.c-style:1.1 Rev: nettle/.cvsignore:1.1 Rev: nettle/AUTHORS:1.1 Rev: nettle/COPYING:1.1 Rev: nettle/COPYING.LIB:1.1 Rev: nettle/ChangeLog:1.1 Rev: nettle/Makefile.in:1.1 Rev: nettle/NEWS:1.1 Rev: nettle/README:1.1 Rev: nettle/TODO:1.1 Rev: nettle/aes-decrypt-internal.c:1.1 Rev: nettle/aes-decrypt.c:1.1 Rev: nettle/aes-encrypt-internal.c:1.1 Rev: nettle/aes-encrypt-table.c:1.1 Rev: nettle/aes-encrypt.c:1.1 Rev: nettle/aes-internal.h:1.1 Rev: nettle/aes-meta.c:1.1 Rev: nettle/aes-set-decrypt-key.c:1.1 Rev: nettle/aes-set-encrypt-key.c:1.1 Rev: nettle/aes.h:1.1 Rev: nettle/aesdata.c:1.1 Rev: nettle/arcfour-crypt.c:1.1 Rev: nettle/arcfour-meta.c:1.1 Rev: nettle/arcfour.c:1.1 Rev: nettle/arcfour.h:1.1 Rev: nettle/arctwo-meta.c:1.1 Rev: nettle/arctwo.c:1.1 Rev: nettle/arctwo.h:1.1 Rev: nettle/asm.m4:1.1 Rev: nettle/asn1.h:1.1 Rev: nettle/base16-decode.c:1.1 Rev: nettle/base16-encode.c:1.1 Rev: nettle/base16-meta.c:1.1 Rev: nettle/base16.h:1.1 Rev: nettle/base64-decode.c:1.1 Rev: nettle/base64-encode.c:1.1 Rev: nettle/base64-meta.c:1.1 Rev: nettle/base64.h:1.1 Rev: nettle/bignum-random.c:1.1 Rev: nettle/bignum.c:1.1 Rev: nettle/bignum.h:1.1 Rev: nettle/blowfish.c:1.1 Rev: nettle/blowfish.h:1.1 Rev: nettle/buffer-init.c:1.1 Rev: nettle/buffer.c:1.1 Rev: nettle/buffer.h:1.1 Rev: nettle/cast128-meta.c:1.1 Rev: nettle/cast128.c:1.1 Rev: nettle/cast128.h:1.1 Rev: nettle/cast128_sboxes.h:1.1 Rev: nettle/cbc.c:1.1 Rev: nettle/cbc.h:1.1 Rev: nettle/config.m4.in:1.1 Rev: nettle/config.make.in:1.1 Rev: nettle/configure.ac:1.1 Rev: nettle/ctr.c:1.1 Rev: nettle/ctr.h:1.1 Rev: nettle/der-iterator.c:1.1 Rev: nettle/der2rsa.c:1.1 Rev: nettle/des-compat.c:1.1 Rev: nettle/des-compat.h:1.1 Rev: nettle/des.c:1.1 Rev: nettle/des.h:1.1 Rev: nettle/des3.c:1.1 Rev: nettle/desCode.h:1.1 Rev: nettle/descore.README:1.1 Rev: nettle/desdata.c:1.1 Rev: nettle/desinfo.h:1.1 Rev: nettle/dsa-keygen.c:1.1 Rev: nettle/dsa-sign.c:1.1 Rev: nettle/dsa-verify.c:1.1 Rev: nettle/dsa.c:1.1 Rev: nettle/dsa.h:1.1 Rev: nettle/examples/.c-style:1.1 Rev: nettle/examples/.cvsignore:1.1 Rev: nettle/examples/Makefile.in:1.1 Rev: nettle/examples/io.c:1.1 Rev: nettle/examples/io.h:1.1 Rev: nettle/examples/nettle-benchmark.c:1.1 Rev: nettle/examples/nettle-openssl.c:1.1 Rev: nettle/examples/rsa-decrypt.c:1.1 Rev: nettle/examples/rsa-encrypt-test:1.1 Rev: nettle/examples/rsa-encrypt.c:1.1 Rev: nettle/examples/rsa-keygen.c:1.1 Rev: nettle/examples/rsa-session.h:1.1 Rev: nettle/examples/rsa-sign-test:1.1 Rev: nettle/examples/rsa-sign.c:1.1 Rev: nettle/examples/rsa-verify-test:1.1 Rev: nettle/examples/rsa-verify.c:1.1 Rev: nettle/examples/setup-env:1.1 Rev: nettle/examples/teardown-env:1.1 Rev: nettle/hmac-md5.c:1.1 Rev: nettle/hmac-sha1.c:1.1 Rev: nettle/hmac-sha256.c:1.1 Rev: nettle/hmac.c:1.1 Rev: nettle/hmac.h:1.1 Rev: nettle/index.html:1.1 Rev: nettle/knuth-lfib.c:1.1 Rev: nettle/knuth-lfib.h:1.1 Rev: nettle/list-obj-sizes.awk:1.1 Rev: nettle/macros.h:1.1 Rev: nettle/md2-meta.c:1.1 Rev: nettle/md2.c:1.1 Rev: nettle/md2.h:1.1 Rev: nettle/md4-meta.c:1.1 Rev: nettle/md4.c:1.1 Rev: nettle/md4.h:1.1 Rev: nettle/md5-compat.c:1.1 Rev: nettle/md5-compat.h:1.1 Rev: nettle/md5-compress.c:1.1 Rev: nettle/md5-meta.c:1.1 Rev: nettle/md5.c:1.1 Rev: nettle/md5.h:1.1 Rev: nettle/memxor.c:1.1 Rev: nettle/memxor.h:1.1 Rev: nettle/nettle-internal.c:1.1 Rev: nettle/nettle-internal.h:1.1 Rev: nettle/nettle-meta.h:1.1 Rev: nettle/nettle-types.h:1.1 Rev: nettle/nettle.texinfo:1.1 Rev: nettle/pgp-encode.c:1.1 Rev: nettle/pgp.h:1.1 Rev: nettle/pkcs1-rsa-md5.c:1.1 Rev: nettle/pkcs1-rsa-sha1.c:1.1 Rev: nettle/pkcs1-rsa-sha256.c:1.1 Rev: nettle/pkcs1.c:1.1 Rev: nettle/pkcs1.h:1.1 Rev: nettle/realloc.c:1.1 Rev: nettle/realloc.h:1.1 Rev: nettle/rsa-compat.c:1.1 Rev: nettle/rsa-compat.h:1.1 Rev: nettle/rsa-decrypt.c:1.1 Rev: nettle/rsa-encrypt.c:1.1 Rev: nettle/rsa-keygen.c:1.1 Rev: nettle/rsa-md5-sign.c:1.1 Rev: nettle/rsa-md5-verify.c:1.1 Rev: nettle/rsa-sha1-sign.c:1.1 Rev: nettle/rsa-sha1-verify.c:1.1 Rev: nettle/rsa-sha256-sign.c:1.1 Rev: nettle/rsa-sha256-verify.c:1.1 Rev: nettle/rsa-sign.c:1.1 Rev: nettle/rsa-verify.c:1.1 Rev: nettle/rsa.c:1.1 Rev: nettle/rsa.h:1.1 Rev: nettle/rsa2openpgp.c:1.1 Rev: nettle/rsa2sexp.c:1.1 Rev: nettle/serpent-meta.c:1.1 Rev: nettle/serpent.c:1.1 Rev: nettle/serpent.h:1.1 Rev: nettle/serpent_sboxes.h:1.1 Rev: nettle/sexp-format.c:1.1 Rev: nettle/sexp-transport-format.c:1.1 Rev: nettle/sexp-transport.c:1.1 Rev: nettle/sexp.c:1.1 Rev: nettle/sexp.h:1.1 Rev: nettle/sexp2bignum.c:1.1 Rev: nettle/sexp2dsa.c:1.1 Rev: nettle/sexp2rsa.c:1.1 Rev: nettle/sha-example.c:1.1 Rev: nettle/sha.h:1.1 Rev: nettle/sha1-compress.c:1.1 Rev: nettle/sha1-meta.c:1.1 Rev: nettle/sha1.c:1.1 Rev: nettle/sha256-meta.c:1.1 Rev: nettle/sha256.c:1.1 Rev: nettle/shadata.c:1.1 Rev: nettle/sparc32/aes-decrypt-internal.asm:1.1 Rev: nettle/sparc32/aes-encrypt-internal.asm:1.1 Rev: nettle/sparc32/aes.m4:1.1 Rev: nettle/sparc32/arcfour-crypt.asm:1.1 Rev: nettle/sparc32/machine.m4:1.1 Rev: nettle/sparc64/aes-decrypt-internal.asm:1.1 Rev: nettle/sparc64/aes-encrypt-internal.asm:1.1 Rev: nettle/sparc64/arcfour-crypt.asm:1.1 Rev: nettle/sparc64/machine.m4:1.1 Rev: nettle/testsuite/.c-style:1.1 Rev: nettle/testsuite/.cvsignore:1.1 Rev: nettle/testsuite/.test-rules.make:1.1 Rev: nettle/testsuite/Makefile.in:1.1 Rev: nettle/testsuite/aes-test.c:1.1 Rev: nettle/testsuite/aes-test.out:1.1 Rev: nettle/testsuite/arcfour-test.c:1.1 Rev: nettle/testsuite/arctwo-test.c:1.1 Rev: nettle/testsuite/base16-test.c:1.1 Rev: nettle/testsuite/base64-test.c:1.1 Rev: nettle/testsuite/bignum-test.c:1.1 Rev: nettle/testsuite/blowfish-test.c:1.1 Rev: nettle/testsuite/buffer-test.c:1.1 Rev: nettle/testsuite/cast128-test.c:1.1 Rev: nettle/testsuite/cbc-test.c:1.1 Rev: nettle/testsuite/ctr-test.c:1.1 Rev: nettle/testsuite/cxx-test.cxx:1.1 Rev: nettle/testsuite/des-compat-test.c:1.1 Rev: nettle/testsuite/des-test.c:1.1 Rev: nettle/testsuite/des3-test.c:1.1 Rev: nettle/testsuite/dsa-keygen-test.c:1.1 Rev: nettle/testsuite/dsa-test.c:1.1 Rev: nettle/testsuite/gold-bug.txt:1.1 Rev: nettle/testsuite/hmac-test.c:1.1 Rev: nettle/testsuite/knuth-lfib-test.c:1.1 Rev: nettle/testsuite/md2-test.c:1.1 Rev: nettle/testsuite/md4-test.c:1.1 Rev: nettle/testsuite/md5-compat-test.c:1.1 Rev: nettle/testsuite/md5-test.c:1.1 Rev: nettle/testsuite/pkcs1-conv-test:1.1 Rev: nettle/testsuite/pkcs1-test.c:1.1 Rev: nettle/testsuite/rsa-encrypt-test.c:1.1 Rev: nettle/testsuite/rsa-keygen-test.c:1.1 Rev: nettle/testsuite/rsa-test.c:1.1 Rev: nettle/testsuite/rsa2sexp-test.c:1.1 Rev: nettle/testsuite/serpent-test.c:1.1 Rev: nettle/testsuite/sexp-conv-test:1.1 Rev: nettle/testsuite/sexp-format-test.c:1.1 Rev: nettle/testsuite/sexp-test.c:1.1 Rev: nettle/testsuite/sexp2rsa-test.c:1.1 Rev: nettle/testsuite/sha1-huge-test.c:1.1 Rev: nettle/testsuite/sha1-test.c:1.1 Rev: nettle/testsuite/sha256-test.c:1.1 Rev: nettle/testsuite/symbols-test:1.1 Rev: nettle/testsuite/teardown-env:1.1 Rev: nettle/testsuite/testutils.c:1.1 Rev: nettle/testsuite/testutils.h:1.1 Rev: nettle/testsuite/twofish-test.c:1.1 Rev: nettle/testsuite/yarrow-test.c:1.1 Rev: nettle/testsuite/yarrow-test.out:1.1 Rev: nettle/tools/.c-style:1.1 Rev: nettle/tools/.cvsignore:1.1 Rev: nettle/tools/.gdbinit:1.1 Rev: nettle/tools/Makefile.in:1.1 Rev: nettle/tools/input.c:1.1 Rev: nettle/tools/input.h:1.1 Rev: nettle/tools/misc.c:1.1 Rev: nettle/tools/misc.h:1.1 Rev: nettle/tools/nettle-lfib-stream.c:1.1 Rev: nettle/tools/output.c:1.1 Rev: nettle/tools/output.h:1.1 Rev: nettle/tools/parse.c:1.1 Rev: nettle/tools/parse.h:1.1 Rev: nettle/tools/pkcs1-conv.c:1.1 Rev: nettle/tools/sexp-conv.c:1.1 Rev: nettle/twofish-meta.c:1.1 Rev: nettle/twofish.c:1.1 Rev: nettle/twofish.h:1.1 Rev: nettle/x86/aes-decrypt-internal.asm:1.1 Rev: nettle/x86/aes-encrypt-internal.asm:1.1 Rev: nettle/x86/aes.m4:1.1 Rev: nettle/x86/arcfour-crypt.asm:1.1 Rev: nettle/x86/machine.m4:1.1 Rev: nettle/x86/md5-compress.asm:1.1 Rev: nettle/x86/sha1-compress.asm:1.1 Rev: nettle/yarrow.h:1.1 Rev: nettle/yarrow256.c:1.1 Rev: nettle/yarrow_key_event.c:1.1 --- ChangeLog | 4 + Makefile.in | 4 +- configure.ac | 2 +- sparc32/aes-decrypt-internal.asm | 132 ++++++++++++++++++ sparc32/aes-encrypt-internal.asm | 156 +++++++++++++++++++++ sparc32/aes.m4 | 83 +++++++++++ sparc32/arcfour-crypt.asm | 230 +++++++++++++++++++++++++++++++ sparc32/machine.m4 | 0 8 files changed, 608 insertions(+), 3 deletions(-) create mode 100644 sparc32/aes-decrypt-internal.asm create mode 100644 sparc32/aes-encrypt-internal.asm create mode 100644 sparc32/aes.m4 create mode 100644 sparc32/arcfour-crypt.asm create mode 100644 sparc32/machine.m4 diff --git a/ChangeLog b/ChangeLog index 4a54c191..e9d8c942 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2007-04-05 Niels M�ller <nisse@lysator.liu.se> + + * Moved in CVS tree. Also renamed directory sparc to sparc32. + 2007-02-24 Niels M�ller <nisse@lysator.liu.se> * Makefile.in (clean-here): Remove .lib directory. diff --git a/Makefile.in b/Makefile.in index 3d4deb2a..efff0601 100644 --- a/Makefile.in +++ b/Makefile.in @@ -38,7 +38,7 @@ clean distclean mostlyclean maintainer-clean tags: check-here: true -# These targets aren't supported, but they are expected by the +# FIXME: Remove. These targets aren't supported, but they are expected by the # automake generated Makefiles in the lsh build. dvi installcheck uninstallcheck: true @@ -302,7 +302,7 @@ distdir: $(DISTFILES) else cp "$(srcdir)/$$f" "$(distdir)" ; \ fi ; \ done - set -e; for d in sparc x86 ; do \ + set -e; for d in sparc32 sparc64 x86 ; do \ mkdir "$(distdir)/$$d" ; \ cp $(srcdir)/$$d/*.asm $(srcdir)/$$d/*.m4 "$(distdir)/$$d" ; \ done diff --git a/configure.ac b/configure.ac index badac4ed..8057d710 100644 --- a/configure.ac +++ b/configure.ac @@ -125,7 +125,7 @@ case "$host_cpu" in #error 64-bit sparc #endif ], [], [ - asm_path=sparc + asm_path=sparc32 ], [ asm_path=sparc64 ]) diff --git a/sparc32/aes-decrypt-internal.asm b/sparc32/aes-decrypt-internal.asm new file mode 100644 index 00000000..750e3d21 --- /dev/null +++ b/sparc32/aes-decrypt-internal.asm @@ -0,0 +1,132 @@ +C -*- mode: asm; asm-comment-char: ?C; -*- +C nettle, low-level cryptographics library +C +C Copyright (C) 2002, 2005 Niels M�ller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +C MA 02111-1307, USA. + +include_src(<sparc/aes.m4>) + +C Arguments +define(<CTX>, <%i0>) +define(<T>, <%i1>) +define(<LENGTH>,<%i2>) +define(<DST>, <%i3>) +define(<SRC>, <%i4>) + +C AES state, two copies for unrolling + +define(<W0>, <%l0>) +define(<W1>, <%l1>) +define(<W2>, <%l2>) +define(<W3>, <%l3>) + +define(<X0>, <%l4>) +define(<X1>, <%l5>) +define(<X2>, <%l6>) +define(<X3>, <%l7>) + +C %o0-%03 are used for loop invariants T0-T3 +define(<KEY>, <%o4>) +define(<ROUND>, <%o5>) + +C %g1, %g2, %g3 are TMP1, TMP2 and TMP3 + +C The sparc32 stack frame looks like +C +C %fp - 4: OS-dependent link field +C %fp - 8: OS-dependent link field +C %fp - 104: OS register save area. +define(<FRAME_SIZE>, 104) + + .file "aes-decrypt-internal.asm" + + C _aes_decrypt(struct aes_context *ctx, + C const struct aes_table *T, + C unsigned length, uint8_t *dst, + C uint8_t *src) + + .section ".text" + .align 16 + .proc 020 + +PROLOGUE(_nettle_aes_decrypt) + + save %sp, -FRAME_SIZE, %sp + cmp LENGTH, 0 + be .Lend + + C Loop invariants + add T, AES_TABLE0, T0 + add T, AES_TABLE1, T1 + add T, AES_TABLE2, T2 + add T, AES_TABLE3, T3 + +.Lblock_loop: + C Read src, and add initial subkey + add CTX, AES_KEYS, KEY + AES_LOAD(0, SRC, KEY, W0) + AES_LOAD(1, SRC, KEY, W1) + AES_LOAD(2, SRC, KEY, W2) + AES_LOAD(3, SRC, KEY, W3) + + C Must be even, and includes the final round + ld [AES_NROUNDS + CTX], ROUND + add SRC, 16, SRC + add KEY, 16, KEY + + srl ROUND, 1, ROUND + C Last two rounds handled specially + sub ROUND, 1, ROUND +.Lround_loop: + C The AES_ROUND macro uses T0,... T3 + C Transform W -> X + AES_ROUND(0, W0, W3, W2, W1, KEY, X0) + AES_ROUND(1, W1, W0, W3, W2, KEY, X1) + AES_ROUND(2, W2, W1, W0, W3, KEY, X2) + AES_ROUND(3, W3, W2, W1, W0, KEY, X3) + + C Transform X -> W + AES_ROUND(4, X0, X3, X2, X1, KEY, W0) + AES_ROUND(5, X1, X0, X3, X2, KEY, W1) + AES_ROUND(6, X2, X1, X0, X3, KEY, W2) + AES_ROUND(7, X3, X2, X1, X0, KEY, W3) + + subcc ROUND, 1, ROUND + bne .Lround_loop + add KEY, 32, KEY + + C Penultimate round + AES_ROUND(0, W0, W3, W2, W1, KEY, X0) + AES_ROUND(1, W1, W0, W3, W2, KEY, X1) + AES_ROUND(2, W2, W1, W0, W3, KEY, X2) + AES_ROUND(3, W3, W2, W1, W0, KEY, X3) + + add KEY, 16, KEY + C Final round + AES_FINAL_ROUND(0, T, X0, X3, X2, X1, KEY, DST) + AES_FINAL_ROUND(1, T, X1, X0, X3, X2, KEY, DST) + AES_FINAL_ROUND(2, T, X2, X1, X0, X3, KEY, DST) + AES_FINAL_ROUND(3, T, X3, X2, X1, X0, KEY, DST) + + subcc LENGTH, 16, LENGTH + bne .Lblock_loop + add DST, 16, DST + +.Lend: + ret + restore +EPILOGUE(_nettle_aes_decrypt) diff --git a/sparc32/aes-encrypt-internal.asm b/sparc32/aes-encrypt-internal.asm new file mode 100644 index 00000000..92d6fc0e --- /dev/null +++ b/sparc32/aes-encrypt-internal.asm @@ -0,0 +1,156 @@ +C -*- mode: asm; asm-comment-char: ?C; -*- +C nettle, low-level cryptographics library +C +C Copyright (C) 2002, 2005 Niels M�ller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +C MA 02111-1307, USA. + +include_src(<sparc/aes.m4>) + +C Arguments +define(<CTX>, <%i0>) +define(<T>, <%i1>) +define(<LENGTH>,<%i2>) +define(<DST>, <%i3>) +define(<SRC>, <%i4>) + +C AES state, two copies for unrolling + +define(<W0>, <%l0>) +define(<W1>, <%l1>) +define(<W2>, <%l2>) +define(<W3>, <%l3>) + +define(<X0>, <%l4>) +define(<X1>, <%l5>) +define(<X2>, <%l6>) +define(<X3>, <%l7>) + +C %o0-%03 are used for loop invariants T0-T3 +define(<KEY>, <%o4>) +define(<ROUND>, <%o5>) + +C %g1, %g2, %g3 are TMP1, TMP2 and TMP3 + +C I'm still slightly confused by the frame layout, specified in +C "SYSTEM V APPLICATION BINARY INTERFACE SPARC Processor Supplement". +C However, Sun's cc generates a 104 byte stack frame for a function +C with no local variables, so that should be good enough for us too. + +C The sparc32 stack frame looks like +C +C %fp - 4: OS-dependent link field +C %fp - 8: OS-dependent link field +C %fp - 104: OS register save area +define(<FRAME_SIZE>, 104) + + .file "aes-encrypt-internal.asm" + + C _aes_encrypt(struct aes_context *ctx, + C const struct aes_table *T, + C unsigned length, uint8_t *dst, + C uint8_t *src) + + .section ".text" + .align 16 + .proc 020 + +PROLOGUE(_nettle_aes_encrypt) + + save %sp, -FRAME_SIZE, %sp + cmp LENGTH, 0 + be .Lend + + C Loop invariants + add T, AES_TABLE0, T0 + add T, AES_TABLE1, T1 + add T, AES_TABLE2, T2 + add T, AES_TABLE3, T3 + +.Lblock_loop: + C Read src, and add initial subkey + add CTX, AES_KEYS, KEY + AES_LOAD(0, SRC, KEY, W0) + AES_LOAD(1, SRC, KEY, W1) + AES_LOAD(2, SRC, KEY, W2) + AES_LOAD(3, SRC, KEY, W3) + + C Must be even, and includes the final round + ld [AES_NROUNDS + CTX], ROUND + add SRC, 16, SRC + add KEY, 16, KEY + + srl ROUND, 1, ROUND + C Last two rounds handled specially + sub ROUND, 1, ROUND +.Lround_loop: + C The AES_ROUND macro uses T0,... T3 + C Transform W -> X + AES_ROUND(0, W0, W1, W2, W3, KEY, X0) + AES_ROUND(1, W1, W2, W3, W0, KEY, X1) + AES_ROUND(2, W2, W3, W0, W1, KEY, X2) + AES_ROUND(3, W3, W0, W1, W2, KEY, X3) + + C Transform X -> W + AES_ROUND(4, X0, X1, X2, X3, KEY, W0) + AES_ROUND(5, X1, X2, X3, X0, KEY, W1) + AES_ROUND(6, X2, X3, X0, X1, KEY, W2) + AES_ROUND(7, X3, X0, X1, X2, KEY, W3) + + subcc ROUND, 1, ROUND + bne .Lround_loop + add KEY, 32, KEY + + C Penultimate round + AES_ROUND(0, W0, W1, W2, W3, KEY, X0) + AES_ROUND(1, W1, W2, W3, W0, KEY, X1) + AES_ROUND(2, W2, W3, W0, W1, KEY, X2) + AES_ROUND(3, W3, W0, W1, W2, KEY, X3) + + add KEY, 16, KEY + C Final round + AES_FINAL_ROUND(0, T, X0, X1, X2, X3, KEY, DST) + AES_FINAL_ROUND(1, T, X1, X2, X3, X0, KEY, DST) + AES_FINAL_ROUND(2, T, X2, X3, X0, X1, KEY, DST) + AES_FINAL_ROUND(3, T, X3, X0, X1, X2, KEY, DST) + + subcc LENGTH, 16, LENGTH + bne .Lblock_loop + add DST, 16, DST + +.Lend: + ret + restore +EPILOGUE(_nettle_aes_encrypt) + +C Some stats from adriana.lysator.liu.se (SS1000$, 85 MHz), for AES 128 + +C 1: nettle-1.13 C-code +C 2: nettle-1.13 assembler +C 3: New C-code +C 4: New assembler, first correct version +C 5: New assembler, with basic scheduling of AES_ROUND. +C 6: New assembpler, with loop invariants T0-T3. +C 7: New assembler, with basic scheduling also of AES_FINAL_ROUND. + +C MB/s cycles/block Code size (bytes) +C 1 1.2 1107 592 +C 2 2.3 572 1032 +C 3 2.1 627 +C 4 1.8 722 +C 5 2.6 496 +C 6 3.0 437 +C 7 3.1 415 1448 diff --git a/sparc32/aes.m4 b/sparc32/aes.m4 new file mode 100644 index 00000000..05f465e0 --- /dev/null +++ b/sparc32/aes.m4 @@ -0,0 +1,83 @@ +C Used as temporaries by the AES macros +define(<TMP1>, <%g1>) +define(<TMP2>, <%g2>) +define(<TMP3>, <%g3>) + +C Loop invariants used by AES_ROUND +define(<T0>, <%o0>) +define(<T1>, <%o1>) +define(<T2>, <%o2>) +define(<T3>, <%o3>) + +C AES_LOAD(i, src, key, res) +define(<AES_LOAD>, < + ldub [$2 + 4*$1], $4 + ldub [$2 + 4*$1 + 1], TMP1 + ldub [$2 + 4*$1 + 2], TMP2 + sll TMP1, 8, TMP1 + + or $4, TMP1, $4 + ldub [$2 + 4*$1+3], TMP1 + sll TMP2, 16, TMP2 + or $4, TMP2, $4 + + sll TMP1, 24, TMP1 + C Get subkey + ld [$3 + 4*$1], TMP2 + or $4, TMP1, $4 + xor $4, TMP2, $4>)dnl + +C AES_ROUND(i, a, b, c, d, key, res) +C Computes one word of the AES round +C FIXME: Could use registers pointing directly to the four tables +C FIXME: Needs better instruction scheduling, and perhaps more temporaries +C Alternatively, we can use a single table and some rotations +define(<AES_ROUND>, < + and $2, 0xff, TMP1 C 0 + srl $3, 6, TMP2 C 1 + sll TMP1, 2, TMP1 C 0 + and TMP2, 0x3fc, TMP2 C 1 + ld [T0 + TMP1], $7 C 0 E0 + srl $4, 14, TMP1 C 2 + ld [T1 + TMP2], TMP2 C 1 + and TMP1, 0x3fc, TMP1 C 2 + xor $7, TMP2, $7 C 1 E1 + srl $5, 22, TMP2 C 3 + ld [T2 + TMP1], TMP1 C 2 + and TMP2, 0x3fc, TMP2 C 3 + xor $7, TMP1, $7 C 2 E2 + ld [$6 + 4*$1], TMP1 C 4 + ld [T3 + TMP2], TMP2 C 3 + xor $7, TMP1, $7 C 4 E4 + xor $7, TMP2, $7 C 3 E3 +>)dnl + +C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst) +C Compute one word in the final round function. Output is converted to +C octets and stored at dst. Relies on AES_SBOX being zero. +define(<AES_FINAL_ROUND>, < + C Load subkey + ld [$7 + 4*$1], TMP3 + + and $3, 0xff, TMP1 C 0 + srl $4, 8, TMP2 C 1 + ldub [T + TMP1], TMP1 C 0 + and TMP2, 0xff, TMP2 C 1 + xor TMP3, TMP1, TMP1 C 0 + ldub [T + TMP2], TMP2 C 1 + stb TMP1, [$8 + 4*$1] C 0 E0 + srl $5, 16, TMP1 C 2 + srl TMP3, 8, TMP3 C 1 + and TMP1, 0xff, TMP1 C 2 + xor TMP3, TMP2, TMP2 C 1 + ldub [T + TMP1], TMP1 C 2 + stb TMP2, [$8 + 4*$1 + 1] C 1 E1 + srl $6, 24, TMP2 C 3 + srl TMP3, 8, TMP3 C 2 + ldub [T + TMP2], TMP2 C 3 + xor TMP3, TMP1, TMP1 C 2 + srl TMP3, 8, TMP3 C 3 + stb TMP1, [$8 + 4*$1 + 2] C 2 E2 + xor TMP3, TMP2, TMP2 C 3 + stb TMP2, [$8 + 4*$1 + 3] C 3 E3 +>) diff --git a/sparc32/arcfour-crypt.asm b/sparc32/arcfour-crypt.asm new file mode 100644 index 00000000..4d8dac94 --- /dev/null +++ b/sparc32/arcfour-crypt.asm @@ -0,0 +1,230 @@ +C -*- mode: asm; asm-comment-char: ?C; -*- +C nettle, low-level cryptographics library +C +C Copyright (C) 2002, 2005 Niels M�ller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +C MA 02111-1307, USA. + +C Define to YES, to enable the complex code to special case SRC +C and DST with compatible alignment. + +define(<WITH_ALIGN>, <YES>) + +C Registers + +define(<CTX>, <%i0>) +define(<LENGTH>,<%i1>) +define(<DST>, <%i2>) +define(<SRC>, <%i3>) + +define(<I1>, <%i4>) +define(<I2>, <%i5>) +define(<J>, <%g1>) +define(<SI>, <%g2>) +define(<SJ>, <%g3>) +define(<TMP>, <%o0>) +define(<TMP2>, <%o1>) +define(<N>, <%o2>) +define(<DATA>, <%o3>) + +C Computes the next byte of the key stream. As input, i must +C already point to the index for the current access, the index +C for the next access is stored in ni. The resulting key byte is +C stored in res. +C ARCFOUR_BYTE(i, ni, res) +define(<ARCFOUR_BYTE>, < + ldub [CTX + $1], SI + add $1, 1, $2 + add J, SI, J + and J, 0xff, J + ldub [CTX + J], SJ + and $2, 0xff, $2 + stb SI, [CTX + J] + add SI, SJ, SI + and SI, 0xff, SI + stb SJ, [CTX + $1] + ldub [CTX + SI], $3 +>)dnl + +C FIXME: Consider using the callers window +define(<FRAME_SIZE>, 104) + + .file "arcfour-crypt.asm" + + C arcfour_crypt(struct arcfour_ctx *ctx, + C unsigned length, uint8_t *dst, + C const uint8_t *src) + + .section ".text" + .align 16 + .proc 020 + +PROLOGUE(nettle_arcfour_crypt) + + save %sp, -FRAME_SIZE, %sp + cmp LENGTH, 0 + be .Lend + nop + + C Load both I and J + lduh [CTX + ARCFOUR_I], I1 + and I1, 0xff, J + srl I1, 8, I1 + + C We want an even address for DST + andcc DST, 1, %g0 + add I1, 1 ,I1 + beq .Laligned2 + and I1, 0xff, I1 + + mov I1, I2 + ldub [SRC], DATA + ARCFOUR_BYTE(I2, I1, TMP) + subcc LENGTH, 1, LENGTH + add SRC, 1, SRC + xor DATA, TMP, DATA + stb DATA, [DST] + beq .Ldone + add DST, 1, DST + +.Laligned2: + + cmp LENGTH, 2 + blu .Lfinal1 + C Harmless delay slot instruction + andcc DST, 2, %g0 + beq .Laligned4 + nop + + ldub [SRC], DATA + ARCFOUR_BYTE(I1, I2, TMP) + ldub [SRC + 1], TMP2 + add SRC, 2, SRC + xor DATA, TMP, DATA + sll DATA, 8, DATA + + ARCFOUR_BYTE(I2, I1, TMP) + xor TMP2, TMP, TMP + subcc LENGTH, 2, LENGTH + or DATA, TMP, DATA + + sth DATA, [DST] + beq .Ldone + add DST, 2, DST + +.Laligned4: + cmp LENGTH, 4 + blu .Lfinal2 + C Harmless delay slot instruction + srl LENGTH, 2, N + +.Loop: + C Main loop, with aligned writes + + C FIXME: Could check if SRC is aligned, and + C use 32-bit reads in that case. + + ldub [SRC], DATA + ARCFOUR_BYTE(I1, I2, TMP) + ldub [SRC + 1], TMP2 + xor TMP, DATA, DATA + sll DATA, 8, DATA + + ARCFOUR_BYTE(I2, I1, TMP) + xor TMP2, TMP, TMP + ldub [SRC + 2], TMP2 + or TMP, DATA, DATA + sll DATA, 8, DATA + + ARCFOUR_BYTE(I1, I2, TMP) + xor TMP2, TMP, TMP + ldub [SRC + 3], TMP2 + or TMP, DATA, DATA + sll DATA, 8, DATA + + ARCFOUR_BYTE(I2, I1, TMP) + xor TMP2, TMP, TMP + or TMP, DATA, DATA + subcc N, 1, N + add SRC, 4, SRC + st DATA, [DST] + bne .Loop + add DST, 4, DST + + andcc LENGTH, 3, LENGTH + beq .Ldone + nop + +.Lfinal2: + C DST address must be 2-aligned + cmp LENGTH, 2 + blu .Lfinal1 + nop + + ldub [SRC], DATA + ARCFOUR_BYTE(I1, I2, TMP) + ldub [SRC + 1], TMP2 + add SRC, 2, SRC + xor DATA, TMP, DATA + sll DATA, 8, DATA + + ARCFOUR_BYTE(I2, I1, TMP) + xor TMP2, TMP, TMP + or DATA, TMP, DATA + + sth DATA, [DST] + beq .Ldone + add DST, 2, DST + +.Lfinal1: + mov I1, I2 + ldub [SRC], DATA + ARCFOUR_BYTE(I2, I1, TMP) + xor DATA, TMP, DATA + stb DATA, [DST] + +.Ldone: + C Save back I and J + sll I2, 8, I2 + or I2, J, I2 + stuh I2, [CTX + ARCFOUR_I] + +.Lend: + ret + restore + +EPILOGUE(nettle_arcfour_crypt) + +C Some stats from adriana.lysator.liu.se (SS1000E, 85 MHz), for AES 128 + +C 1: nettle-1.13 C-code +C 2: First working version of the assembler code +C 3: Moved load of source byte +C 4: Better instruction scheduling +C 5: Special case SRC and DST with compatible alignment +C 6: After bugfix (reorder of ld [CTX+SI+SJ] and st [CTX + SI]) +C 7: Unrolled only twice, with byte-accesses +C 8: Unrolled, using 8-bit reads and aligned 32-bit writes. + +C MB/s cycles/byte Code size (bytes) +C 1: 6.6 12.4 132 +C 2: 5.6 14.5 116 +C 3: 6.0 13.5 116 +C 4: 6.5 12.4 116 +C 5: 7.9 10.4 496 +C 6: 8.3 9.7 496 +C 7: 6.7 12.1 268 +C 8: 8.3 9.8 768 diff --git a/sparc32/machine.m4 b/sparc32/machine.m4 new file mode 100644 index 00000000..e69de29b -- GitLab