diff --git a/ChangeLog b/ChangeLog
index 7a163fe7591a9caafdfe1e6555feee16df2e6c41..a4dd7b2b7aefc0b577b459f0213c5a565b444c20 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
 2013-04-12  Niels Möller  <nisse@lysator.liu.se>
 
-	* armv7/umac-nh.asm: New file. 2.1 time speedup.
+	* armv7/umac-nh.asm: New file. 2.4 time speedup.
+
 	* armv7/machine.m4 (D0REG, D1REG): New macros.
 
 	* configure.ac (asm_replace_list): Added umac-nh.asm and
diff --git a/armv7/umac-nh.asm b/armv7/umac-nh.asm
index 7c0a0290bcd2b4e2fbcaf4b01ea84f4a93f8d888..87cb86d0b1128e9be06ff6faa2a0dcda3fb550c0 100644
--- a/armv7/umac-nh.asm
+++ b/armv7/umac-nh.asm
@@ -30,7 +30,7 @@ define(<QB>, <q1>)
 define(<DM>, <d16>)
 define(<QLEFT>, <q9>)
 define(<QRIGHT>, <q10>)
-define(<QACC>, <q11>)
+define(<QY>, <q11>)
 define(<QT0>, <q12>)
 define(<QT1>, <q13>)
 define(<QK0>, <q14>)
@@ -59,7 +59,7 @@ PROLOGUE(_nettle_umac_nh)
 	vmov.i32 D0REG(QLEFT)[0], SHIFT
 	vmov.32	 D1REG(QLEFT), D0REG(QLEFT)
 
-	vmov.i64 QACC, #0
+	vmov.i64 QY, #0
 
 	vshl.u64 DM, DM, D0REG(QRIGHT)
 .Loop:
@@ -78,14 +78,12 @@ PROLOGUE(_nettle_umac_nh)
 	vld1.i32 {QK0, QK1}, [KEY]!
 	vadd.i32 QA, QA, QK0
 	vadd.i32 QB, QB, QK1
-	vmull.u32 QT0, D0REG(QA), D0REG(QB)
-	vmull.u32 QT1, D1REG(QA), D1REG(QB)
 	subs	LENGTH, LENGTH, #32
-	vadd.i64 QACC, QACC, QT0
-	vadd.i64 QACC, QACC, QT1
+	vmlal.u32 QY, D0REG(QA), D0REG(QB)
+	vmlal.u32 QY, D1REG(QA), D1REG(QB)
 	bhi	.Loop
 
-	vadd.i64 D0REG(QACC), D0REG(QACC), D1REG(QACC)
-	vmov	r0, r1, D0REG(QACC)
+	vadd.i64 D0REG(QY), D0REG(QY), D1REG(QY)
+	vmov	r0, r1, D0REG(QY)
 	bx	lr
 EPILOGUE(_nettle_umac_nh)