From f51537c4e7c6e594adcb3d8ea817cbcd3db64bd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Mon, 27 Jan 2014 19:14:11 +0100 Subject: [PATCH] Use pshufhw + pshuflw for 16-bit rotate. --- ChangeLog | 3 +++ x86_64/chacha-core-internal.asm | 20 ++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index d96a59db..7298cf30 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2014-01-27 Niels Möller <nisse@lysator.liu.se> + * x86_64/chacha-core-internal.asm: Use pshufhw + pshuflw for the + 16-bit rotate. + * configure.ac (asm_replace_list): Added chacha-core-internal.asm. * x86_64/chacha-core-internal.asm: New file. diff --git a/x86_64/chacha-core-internal.asm b/x86_64/chacha-core-internal.asm index 744e0ddf..15e45718 100644 --- a/x86_64/chacha-core-internal.asm +++ b/x86_64/chacha-core-internal.asm @@ -26,15 +26,27 @@ define(<X2>, <%xmm2>) define(<X3>, <%xmm3>) define(<T0>, <%xmm4>) define(<T1>, <%xmm5>) - + +define(<USE_PSHUFW>, <yes>) + +C ROTL_BY_16(REG, TMP) +ifelse(USE_PSHUFW, <yes>, < +define(<ROTL_BY_16>, < + pshufhw <$>0xb1, $1, $1 + pshuflw <$>0xb1, $1, $1 +>)>, < +define(<ROTL_BY_16>, < + pslld <$>16, $1 + psrld <$>16, $2 + por $2, $1 +>) +>) C QROUND define(<QROUND>, < paddd X1, X0 pxor X0, X3 movaps X3, T0 - pslld <$>16, X3 - psrld <$>16, T0 - por T0, X3 + ROTL_BY_16(X3, T0) paddd X3, X2 pxor X2, X1 -- GitLab