diff --git a/ChangeLog b/ChangeLog index d96a59db247cf07e29f4545f0f49b6ee5e6e1847..7298cf3053be5b92ca35f973f978812463d428bf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2014-01-27 Niels Möller <nisse@lysator.liu.se> + * x86_64/chacha-core-internal.asm: Use pshufhw + pshuflw for the + 16-bit rotate. + * configure.ac (asm_replace_list): Added chacha-core-internal.asm. * x86_64/chacha-core-internal.asm: New file. diff --git a/x86_64/chacha-core-internal.asm b/x86_64/chacha-core-internal.asm index 744e0ddfd513d0f014d59a66c392e68cd8efeb9f..15e457183dfd98c493edcbaaa096515b1fc6b96c 100644 --- a/x86_64/chacha-core-internal.asm +++ b/x86_64/chacha-core-internal.asm @@ -26,15 +26,27 @@ define(<X2>, <%xmm2>) define(<X3>, <%xmm3>) define(<T0>, <%xmm4>) define(<T1>, <%xmm5>) - + +define(<USE_PSHUFW>, <yes>) + +C ROTL_BY_16(REG, TMP) +ifelse(USE_PSHUFW, <yes>, < +define(<ROTL_BY_16>, < + pshufhw <$>0xb1, $1, $1 + pshuflw <$>0xb1, $1, $1 +>)>, < +define(<ROTL_BY_16>, < + pslld <$>16, $1 + psrld <$>16, $2 + por $2, $1 +>) +>) C QROUND define(<QROUND>, < paddd X1, X0 pxor X0, X3 movaps X3, T0 - pslld <$>16, X3 - psrld <$>16, T0 - por T0, X3 + ROTL_BY_16(X3, T0) paddd X3, X2 pxor X2, X1