Commit c329ff85 authored by Martin Storsjö's avatar Martin Storsjö Committed by Niels Möller

Do the movd/movq workaround for the osx assembler, for sha3-permute

This is the same workaround as done in f58d1c28 for salsa20-crypt.
parent 8a4ae7c1
2013-03-25 Niels Möller <nisse@lysator.liu.se>
From Martin Storsjö:
* x86_64/sha3-permute.asm: Workaround for Apple's assembler; write
movq instructinos as movd.
* Makefile.in (hogweed_PURE_OBJS): Don't include OPT_ASM_SOURCES
twice.
......
......@@ -72,6 +72,10 @@ define(<STATE>, <OFFSET($1)(CTX)>)
define(<SWAP64>, <pshufd <$>0x4e,>)
C movq calls that are equal to the corresponding movd,
C where the Apple assembler requires them to be written as movd.
define(<MOVQ>, <movd>)
C ROTL64(rot, register, temp)
C Caller needs to or together the result.
define(<ROTL64>, <
......@@ -147,12 +151,12 @@ PROLOGUE(nettle_sha3_permute)
SWAP64 C34, C34 C Holds C4, C3
movdqa C12, D34
movq C0, D12
MOVQ C0, D12
punpcklqdq C12, D12 C Holds C0, C1
punpckhqdq C34, D34 C Holds C2, C3
punpcklqdq D12, C34 C Holds C4, C0
movq C34, D0
movq C12, T0
MOVQ C34, D0
MOVQ C12, T0
rolq $1, T0
xorq T0, D0
......@@ -236,8 +240,8 @@ PROLOGUE(nettle_sha3_permute)
C `-_________-^`-^
rolq $36, A05
movq A05, W0
movq A0607, A05
MOVQ A05, W0
MOVQ A0607, A05
rolq $44, A05 C Done A05
ROTL64(6, A0607, W1)
por A0607, W1
......@@ -260,8 +264,8 @@ PROLOGUE(nettle_sha3_permute)
rolq $42, A10 C 42 + 25 = 3 (mod 64)
SWAP64 A1112, W0
movq A10, A1112
movq W0, A10
MOVQ A10, A1112
MOVQ W0, A10
rolq $43, A10 C Done A10
punpcklqdq A1314, A1112
......@@ -285,8 +289,8 @@ PROLOGUE(nettle_sha3_permute)
SWAP64 A1819, W0
rolq $41, A15
movq A15, W1
movq A1819, A15
MOVQ A15, W1
MOVQ A1819, A15
rolq $21, A15 C Done A15
SWAP64 A1617, A1819
ROTL64(45, A1617, W2)
......@@ -308,7 +312,7 @@ PROLOGUE(nettle_sha3_permute)
C \_______/
rolq $18, A20
movq A20, W0
MOVQ A20, W0
SWAP64 A2324, W1
movd W1, A20
rolq $14, A20 C Done A20
......@@ -386,21 +390,21 @@ PROLOGUE(nettle_sha3_permute)
C Swap (A05, A10) <-> A0102, and (A15, A20) <-> A0304,
C and also copy to C12 and C34 while at it.
movq A05, C12
movq A15, C34
movq A10, W0
movq A20, W1
MOVQ A05, C12
MOVQ A15, C34
MOVQ A10, W0
MOVQ A20, W1
movq A00, C0
punpcklqdq W0, C12
punpcklqdq W1, C34
movq A0102, A05
movq A0304, A15
MOVQ A0102, A05
MOVQ A0304, A15
psrldq $8, A0102
psrldq $8, A0304
xorq A05, C0
xorq A15, C0
movq A0102, A10
movq A0304, A20
MOVQ A0102, A10
MOVQ A0304, A20
movdqa C12, A0102
movdqa C34, A0304
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment