diff --git a/ChangeLog b/ChangeLog index bf86b0bbcf0d1fe242c7c3329bb7baa945a60c7b..456920a1471891547be062569a973247e66f02ae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2012-07-05 Niels Möller <nisse@lysator.liu.se> + + * x86_64/salsa20-crypt.asm (salsa20_crypt): Write the 64-bit movq + instructions as "movd", since that makes the osx assembler + happier. Assembles to the same machine code on gnu/linux. + 2012-07-03 Niels Möller <nisse@lysator.liu.se> * aclocal.m4 (LSH_FUNC_ALLOCA): In the config.h boilerplate, diff --git a/x86_64/salsa20-crypt.asm b/x86_64/salsa20-crypt.asm index 799d574446c5011b8d254699c6ef187d3089c363..b6212247af4fe53779205fc4e9ee5450f27a18b5 100644 --- a/x86_64/salsa20-crypt.asm +++ b/x86_64/salsa20-crypt.asm @@ -237,13 +237,19 @@ PROLOGUE(nettle_salsa20_crypt) and $-16, POS test $8, LENGTH jz .Llt8 - movq T0, T64 + C This "movd" instruction should assemble to + C 66 49 0f 7e e0 movq %xmm4,%r8 + C Apparently, assemblers treat movd and movq (with the + C arguments we use) in the same way, except for osx, which + C barfs at movq. + movd T0, T64 xor (SRC, POS), T64 mov T64, (DST, POS) lea 8(POS), POS pshufd $0xee, T0, T0 C 10 11 10 11 .Llt8: - movq T0, T64 + C And this is also really a movq. + movd T0, T64 test $4, LENGTH jz .Llt4 mov XREG(T64), XREG(COUNT)