diff --git a/ChangeLog b/ChangeLog
index 6009ace971d6b42d484804ff1c07eab401e23435..98d7b4c8ccc561e5c43b1317f68fe2ad98f1071a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2015-01-11  Niels Möller  <nisse@lysator.liu.se>
 
+	* x86_64/aesni/aes-decrypt-internal.asm: New file.
+	* x86_64/aesni/aes-encrypt-internal.asm: New file.
+	* configure.ac: New configure flag --enable-x86-aesni.
+
 	* aclocal.m4 (LSH_RPATH_INIT): Handle freebsd, in the same way as
 	gnu/linux, with -Wl,-rpath,.
 
diff --git a/configure.ac b/configure.ac
index 76beb3160853abde4dfaafcb90fcc35a558c6d2f..63f59c654db0cc9d4263689981eb5141844ab4e9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -72,6 +72,10 @@ AC_ARG_ENABLE(arm-neon,
   AC_HELP_STRING([--enable-arm-neon], [Enable ARM Neon assembly. (default=auto)]),,
   [enable_arm_neon=auto])
 
+AC_ARG_ENABLE(x86-aesni,
+  AC_HELP_STRING([--enable-x86-aesni], [Enable x86_64 aes instructions. (default=no)]),,
+  [enable_x86_aesni=no])
+
 AC_ARG_ENABLE(mini-gmp,
   AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),,
   [enable_mini_gmp=no])
@@ -239,6 +243,9 @@ if test "x$enable_assembler" = xyes ; then
     [x86_64 | amd64])
       if test "$ABI" = 64 ; then
 	asm_path=x86_64
+	if test "x$enable_x86_aesni" = xyes ; then
+	  asm_path="x86_64/aesni $asm_path"
+	fi
       else
 	asm_path=x86
       fi
diff --git a/x86_64/aesni/aes-decrypt-internal.asm b/x86_64/aesni/aes-decrypt-internal.asm
new file mode 100644
index 0000000000000000000000000000000000000000..1dfbcb4cb24e1f40d0aa17b9ab56d7bf06108bfd
--- /dev/null
+++ b/x86_64/aesni/aes-decrypt-internal.asm
@@ -0,0 +1,96 @@
+C x86_64/aesni/aes-decrypt-internal.asm
+
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+C Input argument
+define(<ROUNDS>, <%rdi>)
+define(<KEYS>,	<%rsi>)
+C define(<TABLE>,	<%rdx>) C Unused here
+define(<LENGTH>,<%rcx>)
+define(<DST>,	<%r8>)
+define(<SRC>,	<%r9>)
+
+C Round counter
+define(<CNT>, <%rdx>)
+C Subkey pointer
+define(<KEY>, <%rax>)
+
+	.arch bdver2
+	.file "aes-decrypt-internal.asm"
+
+	C _aes_decrypt(unsigned rounds, const uint32_t *keys,
+	C	       const struct aes_table *T,
+	C	       size_t length, uint8_t *dst,
+	C	       uint8_t *src)
+	.text
+	ALIGN(16)
+PROLOGUE(_nettle_aes_decrypt)
+	W64_ENTRY(6, 2)
+	shr	$4, LENGTH
+	test	LENGTH, LENGTH
+	jz	.Lend
+
+	decl	XREG(ROUNDS)
+
+.Lblock_loop:
+	mov	ROUNDS, CNT
+	mov	KEYS, KEY
+	movups	(SRC), %xmm0
+	C FIXME: Better alignment of subkeys, so we can use movaps.
+	movups	(KEY), %xmm1
+	pxor	%xmm1, %xmm0
+
+	C FIXME: Could use some unrolling. Also all subkeys fit in
+	C registers, so they could be loaded once (on W64 we would
+	C need to save and restore some xmm registers, though).
+
+.Lround_loop:
+	add	$16, KEY
+
+	movups	(KEY), %xmm1
+	aesdec	%xmm1, %xmm0
+	decl	XREG(CNT)
+	jnz	.Lround_loop
+
+	movups	16(KEY), %xmm1
+	aesdeclast	%xmm1, %xmm0
+
+	movups	%xmm0, (DST)
+	add	$16, SRC
+	add	$16, DST
+	dec	LENGTH
+	jnz	.Lblock_loop
+
+.Lend:
+	W64_EXIT(6, 2)
+	ret
+EPILOGUE(_nettle_aes_decrypt)
diff --git a/x86_64/aesni/aes-encrypt-internal.asm b/x86_64/aesni/aes-encrypt-internal.asm
new file mode 100644
index 0000000000000000000000000000000000000000..ee925fe5898ebbcfb03f0cb461ea1a5f7b43bf7c
--- /dev/null
+++ b/x86_64/aesni/aes-encrypt-internal.asm
@@ -0,0 +1,96 @@
+C x86_64/aesni/aes-encrypt-internal.asm
+
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+C Input argument
+define(<ROUNDS>, <%rdi>)
+define(<KEYS>,	<%rsi>)
+C define(<TABLE>,	<%rdx>) C Unused here
+define(<LENGTH>,<%rcx>)
+define(<DST>,	<%r8>)
+define(<SRC>,	<%r9>)
+
+C Round counter
+define(<CNT>, <%rdx>)
+C Subkey pointer
+define(<KEY>, <%rax>)
+
+	.arch bdver2
+	.file "aes-encrypt-internal.asm"
+
+	C _aes_encrypt(unsigned rounds, const uint32_t *keys,
+	C	       const struct aes_table *T,
+	C	       size_t length, uint8_t *dst,
+	C	       uint8_t *src)
+	.text
+	ALIGN(16)
+PROLOGUE(_nettle_aes_encrypt)
+	W64_ENTRY(6, 2)
+	shr	$4, LENGTH
+	test	LENGTH, LENGTH
+	jz	.Lend
+
+	decl	XREG(ROUNDS)
+
+.Lblock_loop:
+	mov	ROUNDS, CNT
+	mov	KEYS, KEY
+	movups	(SRC), %xmm0
+	C FIXME: Better alignment of subkeys, so we can use movaps.
+	movups	(KEY), %xmm1
+	pxor	%xmm1, %xmm0
+
+	C FIXME: Could use some unrolling. Also all subkeys fit in
+	C registers, so they could be loaded once (on W64 we would
+	C need to save and restore some xmm registers, though).
+
+.Lround_loop:
+	add	$16, KEY
+
+	movups	(KEY), %xmm1
+	aesenc	%xmm1, %xmm0
+	decl	XREG(CNT)
+	jnz	.Lround_loop
+
+	movups	16(KEY), %xmm1
+	aesenclast	%xmm1, %xmm0
+
+	movups	%xmm0, (DST)
+	add	$16, SRC
+	add	$16, DST
+	dec	LENGTH
+	jnz	.Lblock_loop
+
+.Lend:
+	W64_EXIT(6, 2)
+	ret
+EPILOGUE(_nettle_aes_encrypt)