Commit 67bae1b9 authored by Yuriy M. Kaminskiy's avatar Yuriy M. Kaminskiy Committed by Niels Möller

Add --enable-fat support for arm neon chacha20

On BCM2837B0 (Cortex-A53) @1.4GHz (Raspberry Pi 3B+),
Before:
`gnutls-cli --benchmark-ciphers`
       CHACHA20-POLY1305 (16384) 51.54 MB/sec
`gnutls-cli --benchmark-tls-ciphers`:
       ECDHE_RSA_CHACHA20_POLY1305 (payload 1400)  21.31 MB/sec
       ECDHE_RSA_CHACHA20_POLY1305 (payload 15360)  24.60 MB/sec
`nettle-benchmark`
		 chacha     encrypt   71.90
		 chacha     decrypt   71.89
	chacha_poly1305     encrypt   48.17
	chacha_poly1305     decrypt   48.17
	chacha_poly1305      update  146.03

After:
`gnutls-cli --benchmark-ciphers`
       CHACHA20-POLY1305 (16384) 68.44 MB/sec
`gnutls-cli --benchmark-tls-ciphers`:
       ECDHE_RSA_CHACHA20_POLY1305 (payload 1400) 27.25 MB/sec
       ECDHE_RSA_CHACHA20_POLY1305 (payload 15360) 32.41 MB/sec
`nettle-benchmark`
		 chacha     encrypt  106.00
		 chacha     decrypt  105.94
	chacha_poly1305     encrypt   65.94
	chacha_poly1305     decrypt   65.96
	chacha_poly1305      update  175.24
parent f9f14f18
C arm/fat/chacha-core-internal-2.asm
ifelse(<
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
>)
dnl PROLOGUE(_nettle_chacha_core) picked up by configure
define(<fat_transform>, <$1_neon>)
include_src(<arm/neon/chacha-core-internal.asm>)
......@@ -51,6 +51,13 @@
#include "macros.h"
/* For fat builds */
#if HAVE_NATIVE_chacha_core
void
_nettle_chacha_core_c(uint32_t *dst, const uint32_t *src, unsigned rounds);
#define _nettle_chacha_core _nettle_chacha_core_c
#endif
#ifndef CHACHA_DEBUG
# define CHACHA_DEBUG 0
#endif
......
......@@ -472,6 +472,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
# Assembler files which generate additional object files if they are used.
asm_nettle_optional_list="gcm-hash8.asm cpuid.asm \
aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm \
chacha-core-internal-2.asm \
salsa20-core-internal-2.asm sha1-compress-2.asm sha256-compress-2.asm \
sha3-permute-2.asm sha512-compress-2.asm \
umac-nh-n-2.asm umac-nh-2.asm"
......@@ -573,6 +574,7 @@ AC_SUBST([IF_ASM])
AH_VERBATIM([HAVE_NATIVE],
[/* Define to 1 each of the following for which a native (ie. CPU specific)
implementation of the corresponding routine exists. */
#undef HAVE_NATIVE_chacha_core
#undef HAVE_NATIVE_ecc_192_modp
#undef HAVE_NATIVE_ecc_192_redc
#undef HAVE_NATIVE_ecc_224_modp
......
......@@ -171,6 +171,10 @@ DECLARE_FAT_FUNC(_nettle_umac_nh_n, umac_nh_n_func)
DECLARE_FAT_FUNC_VAR(umac_nh_n, umac_nh_n_func, c);
DECLARE_FAT_FUNC_VAR(umac_nh_n, umac_nh_n_func, neon);
DECLARE_FAT_FUNC(_nettle_chacha_core, chacha_core_func)
DECLARE_FAT_FUNC_VAR(_chacha_core, chacha_core_func, c);
DECLARE_FAT_FUNC_VAR(_chacha_core, chacha_core_func, neon);
static void CONSTRUCTOR
fat_init (void)
{
......@@ -212,6 +216,7 @@ fat_init (void)
nettle_sha3_permute_vec = _nettle_sha3_permute_neon;
_nettle_umac_nh_vec = _nettle_umac_nh_neon;
_nettle_umac_nh_n_vec = _nettle_umac_nh_n_neon;
_nettle_chacha_core_vec = _nettle_chacha_core_neon;
}
else
{
......@@ -222,6 +227,7 @@ fat_init (void)
nettle_sha3_permute_vec = _nettle_sha3_permute_c;
_nettle_umac_nh_vec = _nettle_umac_nh_c;
_nettle_umac_nh_n_vec = _nettle_umac_nh_n_c;
_nettle_chacha_core_vec = _nettle_chacha_core_c;
}
}
......@@ -267,3 +273,7 @@ DEFINE_FAT_FUNC(_nettle_umac_nh_n, void,
unsigned length, const uint8_t *msg),
(out, n, key, length, msg))
DEFINE_FAT_FUNC(_nettle_chacha_core, void,
(uint32_t *dst, const uint32_t *src, unsigned rounds),
(dst, src, rounds))
......@@ -174,3 +174,5 @@ typedef void sha512_compress_func (uint64_t *state, const uint8_t *input, const
typedef uint64_t umac_nh_func (const uint32_t *key, unsigned length, const uint8_t *msg);
typedef void umac_nh_n_func (uint64_t *out, unsigned n, const uint32_t *key,
unsigned length, const uint8_t *msg);
typedef void chacha_core_func(uint32_t *dst, const uint32_t *src, unsigned rounds);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment