From 7de9a19ad465cf016b4442f6643195d2ebc211ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Mon, 27 Jan 2014 16:49:08 +0100 Subject: [PATCH] x86_64: Added chacha assembly. --- ChangeLog | 3 + configure.ac | 1 + x86_64/chacha-core-internal.asm | 103 ++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 x86_64/chacha-core-internal.asm diff --git a/ChangeLog b/ChangeLog index 6c675955..d96a59db 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2014-01-27 Niels Möller <nisse@lysator.liu.se> + * configure.ac (asm_replace_list): Added chacha-core-internal.asm. + * x86_64/chacha-core-internal.asm: New file. + * examples/nettle-benchmark.c (main): Add benchmarking of chacha. * nettle-internal.c (nettle_chacha): New const struct, for the benchmark. diff --git a/configure.ac b/configure.ac index 6c4b1d28..02606899 100644 --- a/configure.ac +++ b/configure.ac @@ -263,6 +263,7 @@ fi asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \ arcfour-crypt.asm camellia-crypt-internal.asm \ md5-compress.asm memxor.asm poly1305-internal.asm \ + chacha-core-internal.asm \ salsa20-crypt.asm salsa20-core-internal.asm \ serpent-encrypt.asm serpent-decrypt.asm \ sha1-compress.asm sha256-compress.asm sha512-compress.asm \ diff --git a/x86_64/chacha-core-internal.asm b/x86_64/chacha-core-internal.asm new file mode 100644 index 00000000..744e0ddf --- /dev/null +++ b/x86_64/chacha-core-internal.asm @@ -0,0 +1,103 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2012, 2014 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + +define(<DST>, <%rdi>) +define(<SRC>, <%rsi>) +define(<COUNT>, <%rdx>) +define(<X0>, <%xmm0>) +define(<X1>, <%xmm1>) +define(<X2>, <%xmm2>) +define(<X3>, <%xmm3>) +define(<T0>, <%xmm4>) +define(<T1>, <%xmm5>) + +C QROUND +define(<QROUND>, < + paddd X1, X0 + pxor X0, X3 + movaps X3, T0 + pslld <$>16, X3 + psrld <$>16, T0 + por T0, X3 + + paddd X3, X2 + pxor X2, X1 + movaps X1, T0 + pslld <$>12, X1 + psrld <$>20, T0 + por T0, X1 + + paddd X1, X0 + pxor X0, X3 + movaps X3, T0 + pslld <$>8, X3 + psrld <$>24, T0 + por T0, X3 + + paddd X3, X2 + pxor X2, X1 + movaps X1, T0 + pslld <$>7, X1 + psrld <$>25, T0 + por T0, X1 +>) + + C _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds) + .text + ALIGN(16) +PROLOGUE(_nettle_chacha_core) + W64_ENTRY(3, 6) + + movups (SRC), X0 + movups 16(SRC), X1 + movups 32(SRC), X2 + movups 48(SRC), X3 + + shrl $1, XREG(COUNT) + + ALIGN(16) +.Loop: + QROUND(X0, X1, X2, X3) + pshufd $0x39, X1, X1 + pshufd $0x4e, X2, X2 + pshufd $0x93, X3, X3 + + QROUND(X0, X1, X2, X3) + pshufd $0x93, X1, X1 + pshufd $0x4e, X2, X2 + pshufd $0x39, X3, X3 + + decl XREG(COUNT) + jnz .Loop + + movups (SRC), T0 + movups 16(SRC), T1 + paddd T0, X0 + paddd T1, X1 + movups X0,(DST) + movups X1,16(DST) + movups 32(SRC), T0 + movups 48(SRC), T1 + paddd T0, X2 + paddd T1, X3 + movups X2,32(DST) + movups X3,48(DST) + W64_EXIT(3, 6) + ret +EPILOGUE(_nettle_chacha_core) -- GitLab