From aa7ad29333fd8fb3c15fd9022dcd27254f85e8a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Tue, 13 Jan 2015 10:52:35 +0100
Subject: [PATCH] Initial support for fat x86_64 build.

---
 ChangeLog                             |  21 +++
 asm.m4                                |  11 +-
 configure.ac                          |   9 +-
 x86_64/fat/aes-decrypt-internal-2.asm |  35 ++++
 x86_64/fat/aes-decrypt-internal.asm   |  35 ++++
 x86_64/fat/aes-encrypt-internal-2.asm |  35 ++++
 x86_64/fat/aes-encrypt-internal.asm   |  35 ++++
 x86_64/fat/cpuid.asm                  |  58 +++++++
 x86_64/fat/fat.c                      | 229 ++++++++++++++++++++++++++
 9 files changed, 463 insertions(+), 5 deletions(-)
 create mode 100644 x86_64/fat/aes-decrypt-internal-2.asm
 create mode 100644 x86_64/fat/aes-decrypt-internal.asm
 create mode 100644 x86_64/fat/aes-encrypt-internal-2.asm
 create mode 100644 x86_64/fat/aes-encrypt-internal.asm
 create mode 100644 x86_64/fat/cpuid.asm
 create mode 100644 x86_64/fat/fat.c

diff --git a/ChangeLog b/ChangeLog
index 524fa2bc..3ebb0693 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,26 @@
 2015-01-13  Niels Möller  <nisse@lysator.liu.se>
 
+	* x86_64/fat/fat.c: New file, initialization for x86_64 fat
+	library.
+
+	* x86_64/fat/cpuid.asm (_nettle_cpuid): New file and function.
+
+	* x86_64/fat/aes-encrypt-internal.asm: New file, including
+	x86_64/aes-encrypt-internal.asm, after setting fat_suffix to
+	_x86_64.
+	* x86_64/fat/aes-decrypt-internal.asm: New file, analogous setup.
+	* x86_64/fat/aes-encrypt-internal-2.asm: New file, including
+	x86_64/aesni/aes-encrypt-internal.asm, after setting fat_suffix to
+	_aesni.
+	* x86_64/fat/aes-decrypt-internal.asm-2: New file, analogous
+	setup.
+
+	* configure.ac: New command line option --enable-fat.
+	(asm_nettle_optional_list): Added cpuid.asm, fat.c,
+	aes-encrypt-internal-2.asm, and aes-decrypt-internal-2.asm.
+
+	* asm.m4 (fat_suffix): New suffix added to symbol names.
+
 	* x86_64/aesni/aes-encrypt-internal.asm: Use explicit .byte
 	sequences for aes instructions, don't rely on assembler support.
 	* x86_64/aesni/aes-decrypt-internal.asm: Likewise.
diff --git a/asm.m4 b/asm.m4
index 68ec6f59..dc59ab68 100644
--- a/asm.m4
+++ b/asm.m4
@@ -12,6 +12,9 @@ changecom()dnl
 dnl Including files from the srcdir
 define(<include_src>, <include(srcdir/$1)>)dnl
 
+dnl default definition, changed in fat builds
+define(<fat_suffix>, <>)
+
 dnl Pseudo ops
 define(<DECLARE_FUNC>,
 <ifelse(ELF_STYLE,yes,
@@ -24,13 +27,13 @@ COFF_STYLE, yes,
 <>)>)
 
 define(<PROLOGUE>,
-<.globl C_NAME($1)
-DECLARE_FUNC(C_NAME($1))
-C_NAME($1):>)
+<.globl C_NAME($1)<>fat_suffix
+DECLARE_FUNC(C_NAME($1)<>fat_suffix)
+C_NAME($1)<>fat_suffix:>)
 
 define(<EPILOGUE>,
 <ifelse(ELF_STYLE,yes,
-<.size C_NAME($1), . - C_NAME($1)>,<>)>)
+<.size C_NAME($1)<>fat_suffix, . - C_NAME($1)<>fat_suffix>,<>)>)
 
 define(<m4_log2>, <m4_log2_internal($1,1,0)>)
 define(<m4_log2_internal>,
diff --git a/configure.ac b/configure.ac
index 49c52f6d..93f4f901 100644
--- a/configure.ac
+++ b/configure.ac
@@ -68,6 +68,9 @@ AC_ARG_ENABLE(documentation,
   AC_HELP_STRING([--disable-documentation], [Omit building and installing the documentation. (default=auto)]),,
   [enable_documentation=auto])
 
+AC_ARG_ENABLE(fat, AC_HELP_STRING([--enable-fat], [Enable fat library build (default=no)]),,
+  [enable_fat=no])
+
 AC_ARG_ENABLE(arm-neon,
   AC_HELP_STRING([--enable-arm-neon], [Enable ARM Neon assembly. (default=auto)]),,
   [enable_arm_neon=auto])
@@ -248,6 +251,9 @@ if test "x$enable_assembler" = xyes ; then
 	if test "x$enable_x86_aesni" = xyes ; then
 	  asm_path="x86_64/aesni $asm_path"
 	fi
+	if test "x$enable_fat" = xyes ; then
+	  asm_path="x86_64/fat $asm_path"
+	fi
       else
 	asm_path=x86
       fi
@@ -290,7 +296,8 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
 		sha3-permute.asm umac-nh.asm umac-nh-n.asm machine.m4"
 
 # Assembler files which generate additional object files if they are used.
-asm_nettle_optional_list="gcm-hash8.asm"
+asm_nettle_optional_list="gcm-hash8.asm cpuid.asm fat.c \
+  aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm"
 asm_hogweed_optional_list=""
 if test "x$enable_public_key" = "xyes" ; then
   asm_hogweed_optional_list="ecc-192-modp.asm ecc-224-modp.asm \
diff --git a/x86_64/fat/aes-decrypt-internal-2.asm b/x86_64/fat/aes-decrypt-internal-2.asm
new file mode 100644
index 00000000..16fb5598
--- /dev/null
+++ b/x86_64/fat/aes-decrypt-internal-2.asm
@@ -0,0 +1,35 @@
+C x86_64/fat/aes-decrypt-internal-2.asm
+
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+define(<fat_suffix>, <_aesni>)
+include_src(<x86_64/aesni/aes-decrypt-internal.asm>)
diff --git a/x86_64/fat/aes-decrypt-internal.asm b/x86_64/fat/aes-decrypt-internal.asm
new file mode 100644
index 00000000..b05f6a1e
--- /dev/null
+++ b/x86_64/fat/aes-decrypt-internal.asm
@@ -0,0 +1,35 @@
+C x86_64/fat/aes-decrypt-internal.asm
+
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+define(<fat_suffix>, <_x86_64>)
+include_src(<x86_64/aes-decrypt-internal.asm>)
diff --git a/x86_64/fat/aes-encrypt-internal-2.asm b/x86_64/fat/aes-encrypt-internal-2.asm
new file mode 100644
index 00000000..c173d0a5
--- /dev/null
+++ b/x86_64/fat/aes-encrypt-internal-2.asm
@@ -0,0 +1,35 @@
+C x86_64/fat/aes-encrypt-internal-2.asm
+
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+define(<fat_suffix>, <_aesni>)
+include_src(<x86_64/aesni/aes-encrypt-internal.asm>)
diff --git a/x86_64/fat/aes-encrypt-internal.asm b/x86_64/fat/aes-encrypt-internal.asm
new file mode 100644
index 00000000..50a721ab
--- /dev/null
+++ b/x86_64/fat/aes-encrypt-internal.asm
@@ -0,0 +1,35 @@
+C x86_64/fat/aes-encrypt-internal.asm
+
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+define(<fat_suffix>, <_x86_64>)
+include_src(<x86_64/aes-encrypt-internal.asm>)
diff --git a/x86_64/fat/cpuid.asm b/x86_64/fat/cpuid.asm
new file mode 100644
index 00000000..16a66d57
--- /dev/null
+++ b/x86_64/fat/cpuid.asm
@@ -0,0 +1,58 @@
+C x86_64/fat/cpuid.asm
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+C Input argument
+C cpuid input: %edi
+C output pointer: %rsi 	
+
+	.file "cpuid.asm"
+
+	C void _nettle_cpuid(uint32_t in, uint32_t *out)
+
+	.text
+	ALIGN(16)
+PROLOGUE(_nettle_cpuid)
+	W64_ENTRY(2)
+	push	%rbx
+	
+	movl	%edi, %eax
+	cpuid
+	mov	%eax, (%rsi)
+	mov	%ebx, 4(%rsi)
+	mov	%ecx, 8(%rsi)
+	mov	%edx, 12(%rsi)
+
+	pop	%rbx
+	W64_EXIT(2)
+	ret
+EPILOGUE(_nettle_cpuid)
+
diff --git a/x86_64/fat/fat.c b/x86_64/fat/fat.c
new file mode 100644
index 00000000..3585cf52
--- /dev/null
+++ b/x86_64/fat/fat.c
@@ -0,0 +1,229 @@
+/* fat.c
+
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "nettle-types.h"
+
+#include "aes-internal.h"
+
+/* Fat library initialization works as follows. The main function is
+   fat_init. It tries to do initialization only once, but since it is
+   idempotent and pointer updates are atomic on x86_64, there's no
+   harm if it is in some cases called multiple times from several
+   threads.
+
+   The fat_init function checks the cpuid flags, and sets function
+   pointers, e.g, _aes_encrypt_vec, to point to the appropriate
+   implementation.
+
+   To get everything hooked in, we use a belt-and-suspenders approach.
+
+   When compiling with gcc, we try to register a constructor function
+   which calls fat_init as soon as the library is loaded. If this is
+   unavailable or non-working, we instead arrange fat_init to be
+   called on demand.
+
+   For the actual indirection, there are two cases. 
+
+   If ifunc support is available, function pointers are statically
+   initialized to NULL, and we register resolver functions, e.g.,
+   _aes_encrypt_resolve, which calls fat_init, and then returns the
+   function pointer, e.g., the value of _aes_encrypt_vec.
+
+   If ifunc is not available, we have to define a wrapper function to
+   jump via the function pointer. (FIXME: For internal calls, we could
+   do this as a macro instead). We statically initialize each function
+   pointer to point to a special initialization function, e.g.,
+   _aes_encrypt_init, which calls fat_init, and then invokes the right
+   function. This way, all pointers are setup correctly at the first
+   call to any fat function.
+*/
+
+#if HAVE_LINK_IFUNC
+# define IFUNC(resolve) __attribute__ ((ifunc (resolve)))
+#else
+# define IFUNC(resolve)
+#endif
+
+void _nettle_cpuid (uint32_t input, uint32_t regs[4]);
+
+typedef void void_func (void);
+
+typedef void aes_crypt_internal_func (unsigned rounds, const uint32_t *keys,
+				      const struct aes_table *T,
+				      size_t length, uint8_t *dst,
+				      const uint8_t *src);
+aes_crypt_internal_func _aes_encrypt IFUNC ("_aes_encrypt_resolve");
+aes_crypt_internal_func _nettle_aes_encrypt_x86_64;
+aes_crypt_internal_func _nettle_aes_encrypt_aesni;
+
+aes_crypt_internal_func _aes_decrypt IFUNC ("_aes_decrypt_resolve");
+aes_crypt_internal_func _nettle_aes_decrypt_x86_64;
+aes_crypt_internal_func _nettle_aes_decrypt_aesni;
+
+#if HAVE_LINK_IFUNC
+#define _aes_encrypt_init NULL
+#define _aes_decrypt_init NULL
+#else
+static aes_crypt_internal_func _aes_encrypt_init;
+static aes_crypt_internal_func _aes_decrypt_init;
+#endif
+
+static aes_crypt_internal_func *_aes_encrypt_vec = _aes_encrypt_init;
+static aes_crypt_internal_func *_aes_decrypt_vec = _aes_decrypt_init;
+
+/* This function should usually be called only once, at startup. But
+   it is idempotent, and on x86, pointer updates are atomic, so
+   there's no danger if it is called simultaneously from multiple
+   threads. */
+static void
+fat_init (void)
+{
+  static volatile int initialized = 0;
+  uint32_t cpuid_data[4];
+  int verbose;
+  if (initialized)
+    return;
+
+  /* FIXME: Replace all getenv calls by getenv_secure? */
+  verbose = getenv ("NETTLE_FAT_VERBOSE") != NULL;
+  if (verbose)
+    fprintf (stderr, "libnettle: fat library initialization.\n");
+
+  _nettle_cpuid (1, cpuid_data);
+  if (verbose)
+    fprintf (stderr, "libnettle: cpuid 1: %08x, %08x, %08x, %08x\n",
+	     cpuid_data[0], cpuid_data[1], cpuid_data[2], cpuid_data[3]);
+
+  if (cpuid_data[2] & 0x02000000)
+    {
+      if (verbose)
+	fprintf (stderr, "libnettle: aes instructions available.\n");
+      _aes_encrypt_vec = _nettle_aes_encrypt_aesni;
+      _aes_decrypt_vec = _nettle_aes_decrypt_aesni;
+    }
+  else
+    {
+      if (verbose)
+	fprintf (stderr, "libnettle: aes instructions not available.\n");
+      _aes_encrypt_vec = _nettle_aes_encrypt_x86_64;
+      _aes_decrypt_vec = _nettle_aes_decrypt_x86_64;
+    }
+  /* FIXME: We ought to use some thread-aware memory barrier before
+     setting the initialized flag. For now, just do another cpuinfo
+     call to get some synchronization. */
+  _nettle_cpuid (1, cpuid_data);
+  initialized = 1;
+}
+
+#if __GNUC__
+static void __attribute__ ((constructor))
+fat_constructor (void)
+{
+  fat_init ();
+}
+#endif
+
+#if HAVE_LINK_IFUNC
+
+static void_func *
+_aes_encrypt_resolve (void)
+{
+  if (getenv ("NETTLE_FAT_VERBOSE"))
+    fprintf (stderr, "libnettle: _aes_encrypt_resolve\n");
+  fat_init ();
+  return (void_func *) _aes_encrypt_vec;
+}
+
+static void_func *
+_aes_decrypt_resolve (void)
+{
+  if (getenv ("NETTLE_FAT_VERBOSE"))
+    fprintf (stderr, "libnettle: _aes_decrypt_resolve\n");
+  fat_init ();
+  return (void_func *) _aes_decrypt_vec;
+}
+
+#else /* !HAVE_LINK_IFUNC */
+
+/* We need wrapper functions jumping via the function pointer. */
+void
+_aes_encrypt (unsigned rounds, const uint32_t *keys,
+	      const struct aes_table *T,
+	      size_t length, uint8_t *dst,
+	      const uint8_t *src)
+{
+  _aes_encrypt_vec (rounds, keys, T, length, dst, src);
+}
+
+static void
+_aes_encrypt_init (unsigned rounds, const uint32_t *keys,
+		   const struct aes_table *T,
+		   size_t length, uint8_t *dst,
+		   const uint8_t *src)
+{
+  if (getenv ("NETTLE_FAT_VERBOSE"))
+    fprintf (stderr, "libnettle: _aes_encrypt_init\n");
+  fat_init ();
+  assert (_aes_encrypt_vec != _aes_encrypt_init);
+  _aes_encrypt (rounds, keys, T, length, dst, src);
+}
+
+void
+_aes_decrypt (unsigned rounds, const uint32_t *keys,
+	      const struct aes_table *T,
+	      size_t length, uint8_t *dst,
+	      const uint8_t *src)
+{
+  _aes_decrypt_vec (rounds, keys, T, length, dst, src);
+}
+
+static void
+_aes_decrypt_init (unsigned rounds, const uint32_t *keys,
+		   const struct aes_table *T,
+		   size_t length, uint8_t *dst,
+		   const uint8_t *src)
+{
+  if (getenv ("NETTLE_FAT_VERBOSE"))
+    fprintf (stderr, "libnettle: _aes_decrypt_init\n");
+  fat_init ();
+  assert (_aes_decrypt_vec != _aes_decrypt_init);
+  _aes_decrypt (rounds, keys, T, length, dst, src);
+}
+
+#endif /* !HAVE_LINK_IFUNC */
-- 
GitLab