diff --git a/ChangeLog b/ChangeLog index 6009ace971d6b42d484804ff1c07eab401e23435..98d7b4c8ccc561e5c43b1317f68fe2ad98f1071a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2015-01-11 Niels Möller <nisse@lysator.liu.se> + * x86_64/aesni/aes-decrypt-internal.asm: New file. + * x86_64/aesni/aes-encrypt-internal.asm: New file. + * configure.ac: New configure flag --enable-x86-aesni. + * aclocal.m4 (LSH_RPATH_INIT): Handle freebsd, in the same way as gnu/linux, with -Wl,-rpath,. diff --git a/configure.ac b/configure.ac index 76beb3160853abde4dfaafcb90fcc35a558c6d2f..63f59c654db0cc9d4263689981eb5141844ab4e9 100644 --- a/configure.ac +++ b/configure.ac @@ -72,6 +72,10 @@ AC_ARG_ENABLE(arm-neon, AC_HELP_STRING([--enable-arm-neon], [Enable ARM Neon assembly. (default=auto)]),, [enable_arm_neon=auto]) +AC_ARG_ENABLE(x86-aesni, + AC_HELP_STRING([--enable-x86-aesni], [Enable x86_64 aes instructions. (default=no)]),, + [enable_x86_aesni=no]) + AC_ARG_ENABLE(mini-gmp, AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),, [enable_mini_gmp=no]) @@ -239,6 +243,9 @@ if test "x$enable_assembler" = xyes ; then [x86_64 | amd64]) if test "$ABI" = 64 ; then asm_path=x86_64 + if test "x$enable_x86_aesni" = xyes ; then + asm_path="x86_64/aesni $asm_path" + fi else asm_path=x86 fi diff --git a/x86_64/aesni/aes-decrypt-internal.asm b/x86_64/aesni/aes-decrypt-internal.asm new file mode 100644 index 0000000000000000000000000000000000000000..1dfbcb4cb24e1f40d0aa17b9ab56d7bf06108bfd --- /dev/null +++ b/x86_64/aesni/aes-decrypt-internal.asm @@ -0,0 +1,96 @@ +C x86_64/aesni/aes-decrypt-internal.asm + + +ifelse(< + Copyright (C) 2015 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + +C Input argument +define(<ROUNDS>, <%rdi>) +define(<KEYS>, <%rsi>) +C define(<TABLE>, <%rdx>) C Unused here +define(<LENGTH>,<%rcx>) +define(<DST>, <%r8>) +define(<SRC>, <%r9>) + +C Round counter +define(<CNT>, <%rdx>) +C Subkey pointer +define(<KEY>, <%rax>) + + .arch bdver2 + .file "aes-decrypt-internal.asm" + + C _aes_decrypt(unsigned rounds, const uint32_t *keys, + C const struct aes_table *T, + C size_t length, uint8_t *dst, + C uint8_t *src) + .text + ALIGN(16) +PROLOGUE(_nettle_aes_decrypt) + W64_ENTRY(6, 2) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + decl XREG(ROUNDS) + +.Lblock_loop: + mov ROUNDS, CNT + mov KEYS, KEY + movups (SRC), %xmm0 + C FIXME: Better alignment of subkeys, so we can use movaps. + movups (KEY), %xmm1 + pxor %xmm1, %xmm0 + + C FIXME: Could use some unrolling. Also all subkeys fit in + C registers, so they could be loaded once (on W64 we would + C need to save and restore some xmm registers, though). + +.Lround_loop: + add $16, KEY + + movups (KEY), %xmm1 + aesdec %xmm1, %xmm0 + decl XREG(CNT) + jnz .Lround_loop + + movups 16(KEY), %xmm1 + aesdeclast %xmm1, %xmm0 + + movups %xmm0, (DST) + add $16, SRC + add $16, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(6, 2) + ret +EPILOGUE(_nettle_aes_decrypt) diff --git a/x86_64/aesni/aes-encrypt-internal.asm b/x86_64/aesni/aes-encrypt-internal.asm new file mode 100644 index 0000000000000000000000000000000000000000..ee925fe5898ebbcfb03f0cb461ea1a5f7b43bf7c --- /dev/null +++ b/x86_64/aesni/aes-encrypt-internal.asm @@ -0,0 +1,96 @@ +C x86_64/aesni/aes-encrypt-internal.asm + + +ifelse(< + Copyright (C) 2015 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + +C Input argument +define(<ROUNDS>, <%rdi>) +define(<KEYS>, <%rsi>) +C define(<TABLE>, <%rdx>) C Unused here +define(<LENGTH>,<%rcx>) +define(<DST>, <%r8>) +define(<SRC>, <%r9>) + +C Round counter +define(<CNT>, <%rdx>) +C Subkey pointer +define(<KEY>, <%rax>) + + .arch bdver2 + .file "aes-encrypt-internal.asm" + + C _aes_encrypt(unsigned rounds, const uint32_t *keys, + C const struct aes_table *T, + C size_t length, uint8_t *dst, + C uint8_t *src) + .text + ALIGN(16) +PROLOGUE(_nettle_aes_encrypt) + W64_ENTRY(6, 2) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + decl XREG(ROUNDS) + +.Lblock_loop: + mov ROUNDS, CNT + mov KEYS, KEY + movups (SRC), %xmm0 + C FIXME: Better alignment of subkeys, so we can use movaps. + movups (KEY), %xmm1 + pxor %xmm1, %xmm0 + + C FIXME: Could use some unrolling. Also all subkeys fit in + C registers, so they could be loaded once (on W64 we would + C need to save and restore some xmm registers, though). + +.Lround_loop: + add $16, KEY + + movups (KEY), %xmm1 + aesenc %xmm1, %xmm0 + decl XREG(CNT) + jnz .Lround_loop + + movups 16(KEY), %xmm1 + aesenclast %xmm1, %xmm0 + + movups %xmm0, (DST) + add $16, SRC + add $16, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(6, 2) + ret +EPILOGUE(_nettle_aes_encrypt)