Commit 34224fa0 authored by Niels Möller's avatar Niels Möller

Merge branch 'x86_64-sha_ni-sha256'

parents be5b2eda c02493e5
2018-03-14 Niels Möller <nisse@lysator.liu.se>
Merge sha256 code using the x86_64 sha_ni instructions, starting
2018-02-21.
2018-03-11 Niels Möller <nisse@lysator.liu.se>
* x86_64/fat/sha256-compress.asm: New file.
* x86_64/fat/sha256-compress-2.asm: New file.
* fat-x86_64.c (fat_init): Select plain x86_64 assembly version or
sha_ni version for sha256_compress.
2018-02-21 Niels Möller <nisse@lysator.liu.se>
* x86_64/sha_ni/sha256-compress.asm: New implementation using sha_ni
instructions.
2018-02-20 Niels Möller <nisse@lysator.liu.se>
* testsuite/cmac-test.c (test_cmac_hash): Deallocate ctx properly.
......
......@@ -127,6 +127,10 @@ DECLARE_FAT_FUNC(_nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, x86_64)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, sha_ni)
DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, x86_64)
DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, sha_ni)
/* This function should usually be called only once, at startup. But
it is idempotent, and on x86, pointer updates are atomic, so
there's no danger if it is called simultaneously from multiple
......@@ -172,12 +176,14 @@ fat_init (void)
if (verbose)
fprintf (stderr, "libnettle: using sha_ni instructions.\n");
_nettle_sha1_compress_vec = _nettle_sha1_compress_sha_ni;
_nettle_sha256_compress_vec = _nettle_sha256_compress_sha_ni;
}
else
{
if (verbose)
fprintf (stderr, "libnettle: not using sha_ni instructions.\n");
_nettle_sha1_compress_vec = _nettle_sha1_compress_x86_64;
_nettle_sha256_compress_vec = _nettle_sha256_compress_x86_64;
}
if (features.vendor == X86_INTEL)
{
......@@ -214,3 +220,7 @@ DEFINE_FAT_FUNC(nettle_memxor, void *,
DEFINE_FAT_FUNC(_nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
(uint32_t *state, const uint8_t *input, const uint32_t *k),
(state, input, k))
C x86_64/fat/sha256-compress-2.asm
ifelse(<
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
>)
define(<fat_transform>, <$1_sha_ni>)
include_src(<x86_64/sha_ni/sha256-compress.asm>)
C x86_64/fat/sha256-compress.asm
ifelse(<
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
>)
define(<fat_transform>, <$1_x86_64>)
include_src(<x86_64/sha256-compress.asm>)
C x86_64/sha_ni/sha256-compress.asm
ifelse(<
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
>)
.file "sha256-compress.asm"
define(<STATE>, <%rdi>)
define(<INPUT>, <%rsi>)
define(<K>, <%rdx>)
define(<MSGK>,<%xmm0>) C Implicit operand of sha256rnds2
define(<MSG0>,<%xmm1>)
define(<MSG1>,<%xmm2>)
define(<MSG2>,<%xmm3>)
define(<MSG3>,<%xmm4>)
define(<ABEF>,<%xmm5>)
define(<CDGH>,<%xmm6>)
define(<ABEF_ORIG>,<%xmm7>)
define(<CDGH_ORIG>, <%xmm8>)
define(<SWAP_MASK>,<%xmm9>)
define(<TMP>, <%xmm9>) C Overlaps SWAP_MASK
C QROUND(M0, M1, M2, M3, R)
define(<QROUND>, <
movdqa eval($5*4)(K), MSGK
paddd $1, MSGK
sha256rnds2 ABEF, CDGH
pshufd <$>0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF
movdqa $1, TMP
palignr <$>4, $4, TMP
paddd TMP, $2
sha256msg2 $1, $2
sha256msg1 $1, $4
>)
C FIXME: Do something more clever, taking the pshufd into account.
C TRANSPOSE(ABCD, EFGH, scratch) --> untouched, ABEF, CDGH
define(<TRANSPOSE>, <
movdqa $2, $3
punpckhqdq $1, $2
punpcklqdq $1, $3
>)
C void
C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
.text
ALIGN(16)
.Lswap_mask:
.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
PROLOGUE(_nettle_sha256_compress)
W64_ENTRY(3, 10)
movups (STATE), TMP
movups 16(STATE), ABEF
pshufd $0x1b, TMP, TMP
pshufd $0x1b, ABEF, ABEF
TRANSPOSE(TMP, ABEF, CDGH)
movdqa .Lswap_mask(%rip), SWAP_MASK
movdqa ABEF, ABEF_ORIG
movdqa CDGH, CDGH_ORIG
movups (INPUT), MSG0
pshufb SWAP_MASK, MSG0
movdqa (K), MSGK
paddd MSG0, MSGK
sha256rnds2 ABEF, CDGH C Round 0-1
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 2-3
movups 16(INPUT), MSG1
pshufb SWAP_MASK, MSG1
movdqa 16(K), MSGK
paddd MSG1, MSGK
sha256rnds2 ABEF, CDGH C Round 4-5
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 6-7
sha256msg1 MSG1, MSG0
movups 32(INPUT), MSG2
pshufb SWAP_MASK, MSG2
movdqa 32(K), MSGK
paddd MSG2, MSGK
sha256rnds2 ABEF, CDGH C Round 8-9
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 10-11
sha256msg1 MSG2, MSG1
movups 48(INPUT), MSG3
pshufb SWAP_MASK, MSG3
QROUND(MSG3, MSG0, MSG1, MSG2, 12) C Round 12-15
QROUND(MSG0, MSG1, MSG2, MSG3, 16)
QROUND(MSG1, MSG2, MSG3, MSG0, 20)
QROUND(MSG2, MSG3, MSG0, MSG1, 24)
QROUND(MSG3, MSG0, MSG1, MSG2, 28)
QROUND(MSG0, MSG1, MSG2, MSG3, 32)
QROUND(MSG1, MSG2, MSG3, MSG0, 36)
QROUND(MSG2, MSG3, MSG0, MSG1, 40)
QROUND(MSG3, MSG0, MSG1, MSG2, 44)
QROUND(MSG0, MSG1, MSG2, MSG3, 48)
movdqa 208(K), MSGK
paddd MSG1, MSGK
sha256rnds2 ABEF, CDGH C Round 52-53
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 54-55
movdqa MSG1, TMP
palignr $4, MSG0, TMP
paddd TMP, MSG2
sha256msg2 MSG1, MSG2
movdqa 224(K), MSGK
paddd MSG2, MSGK
sha256rnds2 ABEF, CDGH C Round 56-57
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 58-59
movdqa MSG2, TMP
palignr $4, MSG1, TMP
paddd TMP, MSG3
sha256msg2 MSG2, MSG3
movdqa 240(K), MSGK
paddd MSG3, MSGK
sha256rnds2 ABEF, CDGH C Round 60-61
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 62-63
paddd ABEF_ORIG, ABEF
paddd CDGH_ORIG, CDGH
TRANSPOSE(ABEF, CDGH, TMP)
pshufd $0x1b, CDGH, CDGH
pshufd $0x1b, TMP, TMP
movups CDGH, 0(STATE)
movups TMP, 16(STATE)
W64_EXIT(3, 10)
ret
EPILOGUE(_nettle_sha256_compress)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment