diff --git a/ChangeLog b/ChangeLog
index bf86b0bbcf0d1fe242c7c3329bb7baa945a60c7b..456920a1471891547be062569a973247e66f02ae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2012-07-05  Niels Möller  <nisse@lysator.liu.se>
+
+	* x86_64/salsa20-crypt.asm (salsa20_crypt): Write the 64-bit movq
+	instructions as "movd", since that makes the osx assembler
+	happier. Assembles to the same machine code on gnu/linux.
+
 2012-07-03  Niels Möller  <nisse@lysator.liu.se>
 
 	* aclocal.m4 (LSH_FUNC_ALLOCA): In the config.h boilerplate,
diff --git a/x86_64/salsa20-crypt.asm b/x86_64/salsa20-crypt.asm
index 799d574446c5011b8d254699c6ef187d3089c363..b6212247af4fe53779205fc4e9ee5450f27a18b5 100644
--- a/x86_64/salsa20-crypt.asm
+++ b/x86_64/salsa20-crypt.asm
@@ -237,13 +237,19 @@ PROLOGUE(nettle_salsa20_crypt)
 	and	$-16, POS
 	test	$8, LENGTH
 	jz	.Llt8
-	movq	T0, T64
+	C This "movd" instruction should assemble to
+	C 66 49 0f 7e e0          movq   %xmm4,%r8
+	C Apparently, assemblers treat movd and movq (with the
+	C arguments we use) in the same way, except for osx, which
+	C barfs at movq.
+	movd	T0, T64
 	xor	(SRC, POS), T64
 	mov	T64, (DST, POS)
 	lea	8(POS), POS
 	pshufd	$0xee, T0, T0		C 10 11 10 11
 .Llt8:
-	movq	T0, T64
+	C And this is also really a movq.
+	movd	T0, T64
 	test	$4, LENGTH
 	jz	.Llt4
 	mov	XREG(T64), XREG(COUNT)