Commit edf2b37f authored by Niels Möller's avatar Niels Möller

Fixes for w64 ABI.

parent 1851417e
2014-06-01 Niels Möller <nisse@lysator.liu.se>
* x86_64/gcm-hash8.asm: Pass correct argument count to W64_EXIT.
* x86_64/camellia-crypt-internal.asm: Pass correct argument count
to W64_ENTRY and W64_EXIT.
* x86_64/machine.m4 [W64_ABI]: Fix for the case of 6 function
arguments. Also push %rdi unconditionally, and use aligned
accesses for save and restore %xmm registers (movdqa).
2014-05-31 Niels Möller <nisse@lysator.liu.se> 2014-05-31 Niels Möller <nisse@lysator.liu.se>
* configure.ac: Check for COFF type directives. * configure.ac: Check for COFF type directives.
......
...@@ -4,6 +4,8 @@ Up to 6 integer and pointer arguments are passed in registers. Nine ...@@ -4,6 +4,8 @@ Up to 6 integer and pointer arguments are passed in registers. Nine
registers, %rax, %rcx, %rdx, %rsi, %rdi and %r8-%r11 can be used registers, %rax, %rcx, %rdx, %rsi, %rdi and %r8-%r11 can be used
freely. Integers and pointers are returned in %rax. freely. Integers and pointers are returned in %rax.
At entry, it is required that %rsp == 8 (mod 16).
Registers May be Argument Registers May be Argument
clobbered number clobbered number
...@@ -51,14 +53,19 @@ Additional arguments are passed on the stack. "backing store" on the ...@@ -51,14 +53,19 @@ Additional arguments are passed on the stack. "backing store" on the
stack for the four register arguments is also required. %xmm6 to stack for the four register arguments is also required. %xmm6 to
%xmm15 are callee-saved. The "long" type is just 32 bits. %xmm15 are callee-saved. The "long" type is just 32 bits.
If we have five arguments, and push the additional callee-save If we have six arguments, and push the additional callee-save
registers %rdi and %rsi on the stack, we get a stack frame like registers %rdi and %rsi on the stack, we get a stack frame like
64(%rsp): Sixth argument
56(%rsp): Fifth argument 56(%rsp): Fifth argument
48(%rsp): Space for fourth argument 48(%rsp): Space for fourth argument
40(%rsp): Space for third argument 40(%rsp): Space for third argument
32(%rsp): Space for second argument 32(%rsp): Space for second argument
24(%rsp): Space for first argument 24(%rsp): Space for first argument
16(%rsp): Return address 16(%rsp): Return address
8(%rsp): Saved %rsi 8(%rsp) : Saved %rdi
(%rsp) : Saved %rdi (%rsp): Saved %rsi
If, in addition, we use more than 6 %xmm registers, we push them
*after* %rdi (but before %rsi), so that they are stored at 16-byte
aligned addresses.
...@@ -138,7 +138,7 @@ C xorl XREG(TMP), XREG($1) ...@@ -138,7 +138,7 @@ C xorl XREG(TMP), XREG($1)
ALIGN(16) ALIGN(16)
PROLOGUE(_nettle_camellia_crypt) PROLOGUE(_nettle_camellia_crypt)
W64_ENTRY(5, 0) W64_ENTRY(6, 0)
test LENGTH, LENGTH test LENGTH, LENGTH
jz .Lend jz .Lend
...@@ -197,6 +197,6 @@ PROLOGUE(_nettle_camellia_crypt) ...@@ -197,6 +197,6 @@ PROLOGUE(_nettle_camellia_crypt)
pop %rbp pop %rbp
pop %rbx pop %rbx
.Lend: .Lend:
W64_EXIT(5, 0) W64_EXIT(6, 0)
ret ret
EPILOGUE(_nettle_camellia_crypt) EPILOGUE(_nettle_camellia_crypt)
...@@ -162,7 +162,7 @@ ALIGN(16) ...@@ -162,7 +162,7 @@ ALIGN(16)
pop %r12 pop %r12
pop %rbp pop %rbp
pop %rbx pop %rbx
W64_EXIT(2, 0) W64_EXIT(4, 0)
ret ret
.Lpartial: .Lpartial:
......
...@@ -67,44 +67,48 @@ define(<XREG>,<ifelse( ...@@ -67,44 +67,48 @@ define(<XREG>,<ifelse(
dnl W64_ENTRY(nargs, xmm_used) dnl W64_ENTRY(nargs, xmm_used)
define(<W64_ENTRY>, < define(<W64_ENTRY>, <
changequote([,])dnl changequote([,])dnl
ifelse(<<<<<<<<<<<<<<<< ignored; only for balancing) ifelse(<<<<<<<<<<<<<<<<<< ignored; only for balancing)
ifelse(W64_ABI,yes,[ ifelse(W64_ABI,yes,[
dnl unconditionally push %rdi, making %rsp 16-byte aligned
push %rdi
dnl Save %xmm6, ..., if needed
ifelse(eval($2 > 6), 1, [ ifelse(eval($2 > 6), 1, [
sub [$]eval(8 + 16*($2 - 6)), %rsp sub [$]eval(16*($2 - 6)), %rsp
movdqu %xmm6, 0(%rsp) movdqa %xmm6, 0(%rsp)
]) ])
ifelse(eval($2 > 7), 1, [ ifelse(eval($2 > 7), 1, [
movdqu %xmm7, 16(%rsp) movdqa %xmm7, 16(%rsp)
]) ])
ifelse(eval($2 > 8), 1, [ ifelse(eval($2 > 8), 1, [
movdqu %xmm8, 32(%rsp) movdqa %xmm8, 32(%rsp)
]) ])
ifelse(eval($2 > 9), 1, [ ifelse(eval($2 > 9), 1, [
movdqu %xmm9, 48(%rsp) movdqa %xmm9, 48(%rsp)
]) ])
ifelse(eval($2 > 10), 1, [ ifelse(eval($2 > 10), 1, [
movdqu %xmm10, 64(%rsp) movdqa %xmm10, 64(%rsp)
]) ])
ifelse(eval($2 > 11), 1, [ ifelse(eval($2 > 11), 1, [
movdqu %xmm11, 80(%rsp) movdqa %xmm11, 80(%rsp)
]) ])
ifelse(eval($2 > 12), 1, [ ifelse(eval($2 > 12), 1, [
movdqu %xmm12, 96(%rsp) movdqa %xmm12, 96(%rsp)
]) ])
ifelse(eval($2 > 13), 1, [ ifelse(eval($2 > 13), 1, [
movdqu %xmm13, 112(%rsp) movdqa %xmm13, 112(%rsp)
]) ])
ifelse(eval($2 > 14), 1, [ ifelse(eval($2 > 14), 1, [
movdqu %xmm14, 128(%rsp) movdqa %xmm14, 128(%rsp)
]) ])
ifelse(eval($2 > 15), 1, [ ifelse(eval($2 > 15), 1, [
movdqu %xmm15, 144(%rsp) movdqa %xmm15, 144(%rsp)
]) ])
dnl Move around arguments
ifelse(eval($1 >= 1), 1, [ ifelse(eval($1 >= 1), 1, [
push %rdi
mov %rcx, %rdi mov %rcx, %rdi
]) ])
ifelse(eval($1 >= 2), 1, [ ifelse(eval($1 >= 2), 1, [
dnl NOTE: Breaks 16-byte %rsp alignment
push %rsi push %rsi
mov %rdx, %rsi mov %rdx, %rsi
]) ])
...@@ -115,11 +119,10 @@ define(<W64_ENTRY>, < ...@@ -115,11 +119,10 @@ define(<W64_ENTRY>, <
mov %r9, %rcx mov %r9, %rcx
]) ])
ifelse(eval($1 >= 5), 1, [ ifelse(eval($1 >= 5), 1, [
ifelse(eval($2 > 6), 1, [ mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+56),56)(%rsp), %r8
mov eval(8 + 16*($2 - 6) + 56)(%rsp), %r8 ])
], [ ifelse(eval($1 >= 6), 1, [
mov 56(%rsp), %r8 mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+64),64)(%rsp), %r9
])
]) ])
]) ])
changequote(<,>)dnl changequote(<,>)dnl
...@@ -128,45 +131,43 @@ define(<W64_ENTRY>, < ...@@ -128,45 +131,43 @@ define(<W64_ENTRY>, <
dnl W64_EXIT(nargs, xmm_used) dnl W64_EXIT(nargs, xmm_used)
define(<W64_EXIT>, < define(<W64_EXIT>, <
changequote([,])dnl changequote([,])dnl
ifelse(<<<<<<<<<<<< ignored; only for balancing) ifelse(<<<<<<<<<<< ignored; only for balancing)
ifelse(W64_ABI,yes,[ ifelse(W64_ABI,yes,[
ifelse(eval($1 >= 2), 1, [ ifelse(eval($1 >= 2), 1, [
pop %rsi pop %rsi
]) ])
ifelse(eval($1 >= 1), 1, [
pop %rdi
])
ifelse(eval($2 > 15), 1, [ ifelse(eval($2 > 15), 1, [
movdqu 144(%rsp), %xmm15 movdqa 144(%rsp), %xmm15
]) ])
ifelse(eval($2 > 14), 1, [ ifelse(eval($2 > 14), 1, [
movdqu 128(%rsp), %xmm14 movdqa 128(%rsp), %xmm14
]) ])
ifelse(eval($2 > 13), 1, [ ifelse(eval($2 > 13), 1, [
movdqu 112(%rsp), %xmm13 movdqa 112(%rsp), %xmm13
]) ])
ifelse(eval($2 > 12), 1, [ ifelse(eval($2 > 12), 1, [
movdqu 96(%rsp), %xmm12 movdqa 96(%rsp), %xmm12
]) ])
ifelse(eval($2 > 11), 1, [ ifelse(eval($2 > 11), 1, [
movdqu 80(%rsp), %xmm11 movdqa 80(%rsp), %xmm11
]) ])
ifelse(eval($2 > 10), 1, [ ifelse(eval($2 > 10), 1, [
movdqu 64(%rsp), %xmm10 movdqa 64(%rsp), %xmm10
]) ])
ifelse(eval($2 > 9), 1, [ ifelse(eval($2 > 9), 1, [
movdqu 48(%rsp), %xmm9 movdqa 48(%rsp), %xmm9
]) ])
ifelse(eval($2 > 8), 1, [ ifelse(eval($2 > 8), 1, [
movdqu 32(%rsp), %xmm8 movdqa 32(%rsp), %xmm8
]) ])
ifelse(eval($2 > 7), 1, [ ifelse(eval($2 > 7), 1, [
movdqu 16(%rsp), %xmm7 movdqa 16(%rsp), %xmm7
]) ])
ifelse(eval($2 > 6), 1, [ ifelse(eval($2 > 6), 1, [
movdqu 0(%rsp), %xmm6 movdqa (%rsp), %xmm6
add [$]eval(8 + 16*($2 - 6)), %rsp add [$]eval(16*($2 - 6)), %rsp
]) ])
pop %rdi
]) ])
changequote(<,>)dnl changequote(<,>)dnl
>) >)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment