From ade7779c98a5426c7d86c8a01bbd7ad65980c9b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Thu, 18 Apr 2013 14:07:20 +0200
Subject: [PATCH] Reorganization of ARM assembly.

Renamed directory armv7 to arm. New subdirectory arm/neon, for files
using neon instructions. configure.ac hacked to make use of neon
configurable.
---
 ChangeLog                                     | 18 +++++++
 Makefile.in                                   |  2 +-
 aclocal.m4                                    | 31 ++++++++++++
 {armv7 => arm}/README                         |  0
 {armv7 => arm}/aes-decrypt-internal.asm       |  2 +-
 {armv7 => arm}/aes-encrypt-internal.asm       |  2 +-
 {armv7 => arm}/aes.m4                         |  0
 {armv7 => arm}/ecc-192-modp.asm               |  0
 {armv7 => arm}/ecc-224-modp.asm               |  0
 {armv7 => arm}/ecc-256-redc.asm               |  0
 {armv7 => arm}/ecc-384-modp.asm               |  0
 {armv7 => arm}/ecc-521-modp.asm               |  0
 {armv7 => arm}/machine.m4                     |  0
 {armv7 => arm}/memxor.asm                     |  0
 {armv7 => arm/neon}/salsa20-core-internal.asm |  0
 {armv7 => arm/neon}/sha3-permute.asm          |  0
 {armv7 => arm/neon}/sha512-compress.asm       |  0
 {armv7 => arm/neon}/umac-nh-n.asm             |  0
 {armv7 => arm/neon}/umac-nh.asm               |  0
 {armv7 => arm}/sha1-compress.asm              |  0
 {armv7 => arm}/sha256-compress.asm            |  0
 configure.ac                                  | 47 ++++++++++++-------
 22 files changed, 83 insertions(+), 19 deletions(-)
 rename {armv7 => arm}/README (100%)
 rename {armv7 => arm}/aes-decrypt-internal.asm (98%)
 rename {armv7 => arm}/aes-encrypt-internal.asm (98%)
 rename {armv7 => arm}/aes.m4 (100%)
 rename {armv7 => arm}/ecc-192-modp.asm (100%)
 rename {armv7 => arm}/ecc-224-modp.asm (100%)
 rename {armv7 => arm}/ecc-256-redc.asm (100%)
 rename {armv7 => arm}/ecc-384-modp.asm (100%)
 rename {armv7 => arm}/ecc-521-modp.asm (100%)
 rename {armv7 => arm}/machine.m4 (100%)
 rename {armv7 => arm}/memxor.asm (100%)
 rename {armv7 => arm/neon}/salsa20-core-internal.asm (100%)
 rename {armv7 => arm/neon}/sha3-permute.asm (100%)
 rename {armv7 => arm/neon}/sha512-compress.asm (100%)
 rename {armv7 => arm/neon}/umac-nh-n.asm (100%)
 rename {armv7 => arm/neon}/umac-nh.asm (100%)
 rename {armv7 => arm}/sha1-compress.asm (100%)
 rename {armv7 => arm}/sha256-compress.asm (100%)

diff --git a/ChangeLog b/ChangeLog
index 278fe27d..fea26283 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
 2013-04-18  Niels Möller  <nisse@lysator.liu.se>
 
+	* arm/aes-encrypt-internal.asm: Updated include of aes.m4.
+	* arm/aes-decrypt-internal.asm: Likewise.
+
+	* Makefile.in (distdir): Updated for ARM reorganization.
+
+	* configure.ac (asm_path): Generalized, can now be a list of
+	directories. On ARM, check for neon instructions, and add arm/neon
+	if appropriate. New command line options
+	--enable-arm-neon/--disable-arm-neon, for overriding the default.
+
+	arm/neon: New subdirectory, for assembly files making use of neon
+	instructions.
+
+	arm: Renamed directory, from...
+	armv7: ...old name.
+
+	* aclocal.m4 (NETTLE_CHECK_ARM_NEON): New macro.
+
 	* nettle.texinfo (Keyed hash functions): Document UMAC.
 
 	* umac.h (UMAC32_DIGEST_SIZE, UMAC64_DIGEST_SIZE)
diff --git a/Makefile.in b/Makefile.in
index 13e74c62..60abae96 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -516,7 +516,7 @@ distdir: $(DISTFILES)
 	  else cp "$(srcdir)/$$f" "$(distdir)" ; \
 	  fi ; \
 	done
-	set -e; for d in sparc32 sparc64 x86 x86_64 armv7 ; do \
+	set -e; for d in sparc32 sparc64 x86 x86_64 arm arm/neon ; do \
 	  mkdir "$(distdir)/$$d" ; \
 	  cp $(srcdir)/$$d/*.asm $(srcdir)/$$d/*.m4 "$(distdir)/$$d" ; \
 	done
diff --git a/aclocal.m4 b/aclocal.m4
index ae6b204a..0d24fc2e 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -608,6 +608,37 @@ fi
 AC_SUBST(EXEEXT_FOR_BUILD,$gmp_cv_prog_exeext_for_build)
 ])
 
+dnl NETTLE_CHECK_ARM_NEON
+dnl ---------------------
+dnl Check if ARM Neon instructinos should be used.
+dnl Obeys enable_arn_neon, which should be set earlier.
+AC_DEFUN([NETTLE_CHECK_ARM_NEON],
+[if test "$enable_arm_neon" = auto ; then
+  if test "$cross_compiling" = yes ; then
+    dnl Check if compiler/assembler accepts it,
+    dnl without an explicit .fpu neon directive.
+    AC_CACHE_CHECK([if assembler accepts Neon instructions],
+      nettle_cv_asm_arm_neon,
+      [GMP_TRY_ASSEMBLE([
+.text
+foo:
+	vmlal.u32	q1, d0, d1
+],
+      [nettle_cv_asm_arm_neon=yes],
+      [nettle_cv_asm_arm_neon=no])])
+    enable_arm_neon="$nettle_cv_asm_arm_neon"
+  else
+    AC_MSG_CHECKING([if /proc/cpuinfo claims neon support])
+    if grep '^Features.*:.* neon' /proc/cpuinfo >/dev/null ; then
+      enable_arm_neon=yes
+    else
+      enable_arm_neon=no
+    fi
+    AC_MSG_RESULT($enable_arm_neon)
+  fi
+fi
+])
+
 dnl @synopsis AX_CREATE_STDINT_H [( HEADER-TO-GENERATE [, HEADERS-TO-CHECK])]
 dnl
 dnl the "ISO C9X: 7.18 Integer types <stdint.h>" section requires the
diff --git a/armv7/README b/arm/README
similarity index 100%
rename from armv7/README
rename to arm/README
diff --git a/armv7/aes-decrypt-internal.asm b/arm/aes-decrypt-internal.asm
similarity index 98%
rename from armv7/aes-decrypt-internal.asm
rename to arm/aes-decrypt-internal.asm
index 9ceb7a62..1cd92fb2 100644
--- a/armv7/aes-decrypt-internal.asm
+++ b/arm/aes-decrypt-internal.asm
@@ -17,7 +17,7 @@ C along with the nettle library; see the file COPYING.LIB.  If not, write to
 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 C MA 02111-1301, USA.
 
-include_src(<armv7/aes.m4>)
+include_src(<arm/aes.m4>)
 
 C	Benchmarked at at 785, 914, 1051 cycles/block on cortex A9,
 C	for 128, 192 and 256 bit key sizes. Unclear why it is slower
diff --git a/armv7/aes-encrypt-internal.asm b/arm/aes-encrypt-internal.asm
similarity index 98%
rename from armv7/aes-encrypt-internal.asm
rename to arm/aes-encrypt-internal.asm
index 6bd1e989..b3309351 100644
--- a/armv7/aes-encrypt-internal.asm
+++ b/arm/aes-encrypt-internal.asm
@@ -17,7 +17,7 @@ C along with the nettle library; see the file COPYING.LIB.  If not, write to
 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 C MA 02111-1301, USA.
 
-include_src(<armv7/aes.m4>)
+include_src(<arm/aes.m4>)
 
 C	Benchmarked at at 693, 824, 950 cycles/block on cortex A9,
 C	for 128, 192 and 256 bit key sizes.
diff --git a/armv7/aes.m4 b/arm/aes.m4
similarity index 100%
rename from armv7/aes.m4
rename to arm/aes.m4
diff --git a/armv7/ecc-192-modp.asm b/arm/ecc-192-modp.asm
similarity index 100%
rename from armv7/ecc-192-modp.asm
rename to arm/ecc-192-modp.asm
diff --git a/armv7/ecc-224-modp.asm b/arm/ecc-224-modp.asm
similarity index 100%
rename from armv7/ecc-224-modp.asm
rename to arm/ecc-224-modp.asm
diff --git a/armv7/ecc-256-redc.asm b/arm/ecc-256-redc.asm
similarity index 100%
rename from armv7/ecc-256-redc.asm
rename to arm/ecc-256-redc.asm
diff --git a/armv7/ecc-384-modp.asm b/arm/ecc-384-modp.asm
similarity index 100%
rename from armv7/ecc-384-modp.asm
rename to arm/ecc-384-modp.asm
diff --git a/armv7/ecc-521-modp.asm b/arm/ecc-521-modp.asm
similarity index 100%
rename from armv7/ecc-521-modp.asm
rename to arm/ecc-521-modp.asm
diff --git a/armv7/machine.m4 b/arm/machine.m4
similarity index 100%
rename from armv7/machine.m4
rename to arm/machine.m4
diff --git a/armv7/memxor.asm b/arm/memxor.asm
similarity index 100%
rename from armv7/memxor.asm
rename to arm/memxor.asm
diff --git a/armv7/salsa20-core-internal.asm b/arm/neon/salsa20-core-internal.asm
similarity index 100%
rename from armv7/salsa20-core-internal.asm
rename to arm/neon/salsa20-core-internal.asm
diff --git a/armv7/sha3-permute.asm b/arm/neon/sha3-permute.asm
similarity index 100%
rename from armv7/sha3-permute.asm
rename to arm/neon/sha3-permute.asm
diff --git a/armv7/sha512-compress.asm b/arm/neon/sha512-compress.asm
similarity index 100%
rename from armv7/sha512-compress.asm
rename to arm/neon/sha512-compress.asm
diff --git a/armv7/umac-nh-n.asm b/arm/neon/umac-nh-n.asm
similarity index 100%
rename from armv7/umac-nh-n.asm
rename to arm/neon/umac-nh-n.asm
diff --git a/armv7/umac-nh.asm b/arm/neon/umac-nh.asm
similarity index 100%
rename from armv7/umac-nh.asm
rename to arm/neon/umac-nh.asm
diff --git a/armv7/sha1-compress.asm b/arm/sha1-compress.asm
similarity index 100%
rename from armv7/sha1-compress.asm
rename to arm/sha1-compress.asm
diff --git a/armv7/sha256-compress.asm b/arm/sha256-compress.asm
similarity index 100%
rename from armv7/sha256-compress.asm
rename to arm/sha256-compress.asm
diff --git a/configure.ac b/configure.ac
index 7a0ca826..e13e554d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -68,6 +68,10 @@ AC_ARG_ENABLE(documentation,
   AC_HELP_STRING([--disable-documentation], [Omit building and installing the documentation. (default=auto)]),,
   [enable_documentation=auto])
 
+AC_ARG_ENABLE(arm-neon,
+  AC_HELP_STRING([--enable-arm-neon], [Enable ARM Neon assembly. (default=auto)]),,
+  [enable_arm_neon=auto])
+
 LSH_RPATH_INIT([`echo $with_lib_path | sed 's/:/ /g'` \
     `echo $exec_prefix | sed "s@^NONE@$prefix/lib@g" | sed "s@^NONE@$ac_default_prefix/lib@g"` \
     /usr/local/lib /sw/local/lib /sw/lib \
@@ -234,7 +238,12 @@ if test "x$enable_assembler" = xyes ; then
       fi
       ;;
     armv7l*)
-      asm_path=armv7
+      NETTLE_CHECK_ARM_NEON
+
+      asm_path=arm
+      if test "x$enable_arm_neon" = xyes ; then
+	asm_path="arm/neon $asm_path"
+      fi
       ;;
     *)
       enable_assembler=no
@@ -266,27 +275,33 @@ asm_file_list=""
 
 if test "x$enable_assembler" = xyes ; then
   if test -n "$asm_path"; then
-    AC_MSG_NOTICE([Looking for assembler files in $asm_path/.])
+    AC_MSG_NOTICE([Looking for assembler files in $asm_path.])
     for tmp_f in $asm_replace_list ; do
-      if test -f "$srcdir/$asm_path/$tmp_f"; then
-	asm_file_list="$asm_file_list $tmp_f"
-        AC_CONFIG_LINKS($tmp_f:$asm_path/$tmp_f)
-      fi
+      for asm_dir in $asm_path ; do
+        if test -f "$srcdir/$asm_dir/$tmp_f"; then
+	  asm_file_list="$asm_file_list $tmp_f"
+          AC_CONFIG_LINKS($tmp_f:$asm_dir/$tmp_f)
+	  break
+        fi
+      done
     done
     dnl Workaround for AC_CONFIG_LINKS, which complains if we use the
     dnl same destination argument $tmp_f multiple times.
     for tmp_o in $asm_optional_list ; do
-      if test -f "$srcdir/$asm_path/$tmp_o"; then
-	asm_file_list="$asm_file_list $tmp_o"
-        AC_CONFIG_LINKS($tmp_o:$asm_path/$tmp_o)
-	while read tmp_func ; do
-	  AC_DEFINE_UNQUOTED(HAVE_NATIVE_$tmp_func)
-	  eval HAVE_NATIVE_$tmp_func=yes
-	done <<EOF
-[`sed -n 's/[^ 	]*PROLOGUE(_*\(nettle_\)*\([^)]*\)).*$/\2/p' < "$srcdir/$asm_path/$tmp_o"`]
+      for asm_dir in $asm_path ; do
+	if test -f "$srcdir/$asm_dir/$tmp_o"; then
+	  asm_file_list="$asm_file_list $tmp_o"
+	  AC_CONFIG_LINKS($tmp_o:$asm_dir/$tmp_o)
+	  while read tmp_func ; do
+	    AC_DEFINE_UNQUOTED(HAVE_NATIVE_$tmp_func)
+	    eval HAVE_NATIVE_$tmp_func=yes
+	  done <<EOF
+[`sed -n 's/[^ 	]*PROLOGUE(_*\(nettle_\)*\([^)]*\)).*$/\2/p' < "$srcdir/$asm_dir/$tmp_o"`]
 EOF
-	OPT_ASM_SOURCES="$OPT_ASM_SOURCES $tmp_o"
-      fi
+	  OPT_ASM_SOURCES="$OPT_ASM_SOURCES $tmp_o"
+	  break
+	fi
+      done
     done	
     if test -z "$asm_file_list"; then
       enable_assembler=no
-- 
GitLab