Commit c329ff85 authored by Martin Storsjö's avatar Martin Storsjö Committed by Niels Möller
Browse files

Do the movd/movq workaround for the osx assembler, for sha3-permute

This is the same workaround as done in f58d1c28 for salsa20-crypt.
parent 8a4ae7c1
2013-03-25 Niels Möller <nisse@lysator.liu.se> 2013-03-25 Niels Möller <nisse@lysator.liu.se>
From Martin Storsjö:
* x86_64/sha3-permute.asm: Workaround for Apple's assembler; write
movq instructinos as movd.
* Makefile.in (hogweed_PURE_OBJS): Don't include OPT_ASM_SOURCES * Makefile.in (hogweed_PURE_OBJS): Don't include OPT_ASM_SOURCES
twice. twice.
......
...@@ -72,6 +72,10 @@ define(<STATE>, <OFFSET($1)(CTX)>) ...@@ -72,6 +72,10 @@ define(<STATE>, <OFFSET($1)(CTX)>)
define(<SWAP64>, <pshufd <$>0x4e,>) define(<SWAP64>, <pshufd <$>0x4e,>)
C movq calls that are equal to the corresponding movd,
C where the Apple assembler requires them to be written as movd.
define(<MOVQ>, <movd>)
C ROTL64(rot, register, temp) C ROTL64(rot, register, temp)
C Caller needs to or together the result. C Caller needs to or together the result.
define(<ROTL64>, < define(<ROTL64>, <
...@@ -147,12 +151,12 @@ PROLOGUE(nettle_sha3_permute) ...@@ -147,12 +151,12 @@ PROLOGUE(nettle_sha3_permute)
SWAP64 C34, C34 C Holds C4, C3 SWAP64 C34, C34 C Holds C4, C3
movdqa C12, D34 movdqa C12, D34
movq C0, D12 MOVQ C0, D12
punpcklqdq C12, D12 C Holds C0, C1 punpcklqdq C12, D12 C Holds C0, C1
punpckhqdq C34, D34 C Holds C2, C3 punpckhqdq C34, D34 C Holds C2, C3
punpcklqdq D12, C34 C Holds C4, C0 punpcklqdq D12, C34 C Holds C4, C0
movq C34, D0 MOVQ C34, D0
movq C12, T0 MOVQ C12, T0
rolq $1, T0 rolq $1, T0
xorq T0, D0 xorq T0, D0
...@@ -236,8 +240,8 @@ PROLOGUE(nettle_sha3_permute) ...@@ -236,8 +240,8 @@ PROLOGUE(nettle_sha3_permute)
C `-_________-^`-^ C `-_________-^`-^
rolq $36, A05 rolq $36, A05
movq A05, W0 MOVQ A05, W0
movq A0607, A05 MOVQ A0607, A05
rolq $44, A05 C Done A05 rolq $44, A05 C Done A05
ROTL64(6, A0607, W1) ROTL64(6, A0607, W1)
por A0607, W1 por A0607, W1
...@@ -260,8 +264,8 @@ PROLOGUE(nettle_sha3_permute) ...@@ -260,8 +264,8 @@ PROLOGUE(nettle_sha3_permute)
rolq $42, A10 C 42 + 25 = 3 (mod 64) rolq $42, A10 C 42 + 25 = 3 (mod 64)
SWAP64 A1112, W0 SWAP64 A1112, W0
movq A10, A1112 MOVQ A10, A1112
movq W0, A10 MOVQ W0, A10
rolq $43, A10 C Done A10 rolq $43, A10 C Done A10
punpcklqdq A1314, A1112 punpcklqdq A1314, A1112
...@@ -285,8 +289,8 @@ PROLOGUE(nettle_sha3_permute) ...@@ -285,8 +289,8 @@ PROLOGUE(nettle_sha3_permute)
SWAP64 A1819, W0 SWAP64 A1819, W0
rolq $41, A15 rolq $41, A15
movq A15, W1 MOVQ A15, W1
movq A1819, A15 MOVQ A1819, A15
rolq $21, A15 C Done A15 rolq $21, A15 C Done A15
SWAP64 A1617, A1819 SWAP64 A1617, A1819
ROTL64(45, A1617, W2) ROTL64(45, A1617, W2)
...@@ -308,7 +312,7 @@ PROLOGUE(nettle_sha3_permute) ...@@ -308,7 +312,7 @@ PROLOGUE(nettle_sha3_permute)
C \_______/ C \_______/
rolq $18, A20 rolq $18, A20
movq A20, W0 MOVQ A20, W0
SWAP64 A2324, W1 SWAP64 A2324, W1
movd W1, A20 movd W1, A20
rolq $14, A20 C Done A20 rolq $14, A20 C Done A20
...@@ -386,21 +390,21 @@ PROLOGUE(nettle_sha3_permute) ...@@ -386,21 +390,21 @@ PROLOGUE(nettle_sha3_permute)
C Swap (A05, A10) <-> A0102, and (A15, A20) <-> A0304, C Swap (A05, A10) <-> A0102, and (A15, A20) <-> A0304,
C and also copy to C12 and C34 while at it. C and also copy to C12 and C34 while at it.
movq A05, C12 MOVQ A05, C12
movq A15, C34 MOVQ A15, C34
movq A10, W0 MOVQ A10, W0
movq A20, W1 MOVQ A20, W1
movq A00, C0 movq A00, C0
punpcklqdq W0, C12 punpcklqdq W0, C12
punpcklqdq W1, C34 punpcklqdq W1, C34
movq A0102, A05 MOVQ A0102, A05
movq A0304, A15 MOVQ A0304, A15
psrldq $8, A0102 psrldq $8, A0102
psrldq $8, A0304 psrldq $8, A0304
xorq A05, C0 xorq A05, C0
xorq A15, C0 xorq A15, C0
movq A0102, A10 MOVQ A0102, A10
movq A0304, A20 MOVQ A0304, A20
movdqa C12, A0102 movdqa C12, A0102
movdqa C34, A0304 movdqa C34, A0304
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment