diff --git a/ChangeLog b/ChangeLog
index 581d86ca38f71c35b197b35fcbe51feec7462db0..3b2a590ac02d86da85bb031bb5013df3e69450e2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,6 @@
 2013-03-06  Niels Möller  <nisse@lysator.liu.se>
 
+	* x86_64/ecc-256-redc.asm: New file, 2.5 time speedup.
 	* x86_64/ecc-224-modp.asm: New file, 5 time speedup over C
 	version.
 
diff --git a/x86_64/ecc-256-redc.asm b/x86_64/ecc-256-redc.asm
new file mode 100644
index 0000000000000000000000000000000000000000..dc7ea340ab2b2d95952bd0e14984c298feb950fb
--- /dev/null
+++ b/x86_64/ecc-256-redc.asm
@@ -0,0 +1,116 @@
+C nettle, low-level cryptographics library
+C
+C Copyright (C) 2013 Niels Möller
+C
+C The nettle library is free software; you can redistribute it and/or modify
+C it under the terms of the GNU Lesser General Public License as published by
+C the Free Software Foundation; either version 2.1 of the License, or (at your
+C option) any later version.
+C
+C The nettle library is distributed in the hope that it will be useful, but
+C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+C License for more details.
+C
+C You should have received a copy of the GNU Lesser General Public License
+C along with the nettle library; see the file COPYING.LIB.  If not, write to
+C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+C MA 02111-1301, USA.
+
+	.file "ecc-256-redc.asm"
+
+define(<RP>, <%rsi>)
+define(<U0>, <%rdi>) C Overlaps unused ecc input
+define(<U1>, <%rcx>)
+define(<U2>, <%rax>)
+define(<U3>, <%rdx>)
+define(<U4>, <%r8>)
+define(<U5>, <%r9>)
+define(<U6>, <%r10>)
+define(<F0>, <%r11>)
+define(<F1>, <%r12>)
+define(<F2>, <%rbx>)
+define(<F3>, <%rbp>)
+
+C FOLD(x), sets (F3,F2,F1,F0)  <-- (x << 224) - (x << 128) - (x<<32)
+define(<FOLD>, <
+	mov	$1, F2
+	mov	$1, F3
+	shl	<$>32, F2
+	shr	<$>32, F3
+	xor	F0,F0
+	xor	F1,F1
+	sub	F2, F0
+	sbb	F3, F1
+	sbb	$1, F2
+	sbb	<$>0, F3
+>)
+PROLOGUE(nettle_ecc_256_redc)
+	W64_ENTRY(2, 0)
+	C save all registers that need to be saved
+	push	%rbx
+	push	%rbp
+	push	%r12
+
+	mov	(RP), U0
+	FOLD(U0)
+	mov	8(RP), U1
+	mov	16(RP), U2
+	mov	24(RP), U3
+	sub	F0, U1
+	sbb	F1, U2
+	sbb	F2, U3
+	sbb	F3, U0		C Add in later
+
+	FOLD(U1)
+	mov	32(RP), U4
+	sub	F0, U2
+	sbb	F1, U3
+	sbb	F2, U4
+	sbb	F3, U1
+
+	FOLD(U2)
+	mov	40(RP), U5
+	sub	F0, U3
+	sbb	F1, U4
+	sbb	F2, U5
+	sbb	F3, U2
+
+	FOLD(U3)
+	mov	48(RP), U6
+	sub	F0, U4
+	sbb	F1, U5
+	sbb	F2, U6
+	sbb	F3, U3
+
+	add	U4, U0
+	adc	U5, U1
+	adc	U6, U2
+	adc	56(RP), U3
+
+	C If carry, we need to add in
+	C 2^256 - p = <0xfffffffe, 0xff..ff, 0xffffffff00000000, 1>
+	sbb	F2, F2
+	mov	F2, F0
+	mov	F2, F1
+	mov	XREG(F2), XREG(F3)
+	neg	F0
+	shl	$32, F1
+	and	$-2, XREG(F3)
+
+	add	F0, U0
+	mov	U0, (RP)
+	adc	F1, U1
+	mov	U1, 8(RP)
+	adc	F2, U2
+	mov	U2, 16(RP)
+	adc	F3, U3
+
+	mov	U3, 24(RP)
+
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	W64_EXIT(2, 0)
+	ret
+EPILOGUE(nettle_ecc_256_redc)