From 05964538d40285e632d848b5f7d1f5a1e481a9d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 23 Apr 2013 18:35:53 +0200
Subject: [PATCH] Use movdqu instead of movdqa for saving xmm registers

The stack is not guaranteed to be 16-byte aligned on win64.
---
 ChangeLog         |  3 +++
 x86_64/machine.m4 | 40 ++++++++++++++++++++--------------------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 9392a754..0a30fbd4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,9 @@
 2013-04-23  Niels Möller  <nisse@lysator.liu.se>
 
 	From Martin Storsjö:
+	* x86_64/machine.m4 (W64_ENTRY): Use movdqu instead of movdqa for
+	saving xmm registers, since the stack is not guaranteed to be
+	16-byte aligned on win64.
 	* Makefile.in: Consistently use EXEEXT_FOR_BUILD.
 
 2013-04-21  Niels Möller  <nisse@lysator.liu.se>
diff --git a/x86_64/machine.m4 b/x86_64/machine.m4
index dc23dde8..d5d5b37c 100644
--- a/x86_64/machine.m4
+++ b/x86_64/machine.m4
@@ -71,34 +71,34 @@ define(<W64_ENTRY>, <
   ifelse(W64_ABI,yes,[
     ifelse(eval($2 > 6), 1, [
       sub	[$]eval(8 + 16*($2 - 6)), %rsp
-      movdqa	%xmm6, 0(%rsp)
+      movdqu	%xmm6, 0(%rsp)
     ])
     ifelse(eval($2 > 7), 1, [
-      movdqa	%xmm7, 16(%rsp)
+      movdqu	%xmm7, 16(%rsp)
     ])
     ifelse(eval($2 > 8), 1, [
-      movdqa	%xmm8, 32(%rsp)
+      movdqu	%xmm8, 32(%rsp)
     ])
     ifelse(eval($2 > 9), 1, [
-      movdqa	%xmm9, 48(%rsp)
+      movdqu	%xmm9, 48(%rsp)
     ])
     ifelse(eval($2 > 10), 1, [
-      movdqa	%xmm10, 64(%rsp)
+      movdqu	%xmm10, 64(%rsp)
     ])
     ifelse(eval($2 > 11), 1, [
-      movdqa	%xmm11, 80(%rsp)
+      movdqu	%xmm11, 80(%rsp)
     ])
     ifelse(eval($2 > 12), 1, [
-      movdqa	%xmm12, 96(%rsp)
+      movdqu	%xmm12, 96(%rsp)
     ])
     ifelse(eval($2 > 13), 1, [
-      movdqa	%xmm13, 112(%rsp)
+      movdqu	%xmm13, 112(%rsp)
     ])
     ifelse(eval($2 > 14), 1, [
-      movdqa	%xmm14, 128(%rsp)
+      movdqu	%xmm14, 128(%rsp)
     ])
     ifelse(eval($2 > 15), 1, [
-      movdqa	%xmm15, 144(%rsp)
+      movdqu	%xmm15, 144(%rsp)
     ])
     ifelse(eval($1 >= 1), 1, [
       push	%rdi
@@ -133,34 +133,34 @@ define(<W64_EXIT>, <
       pop	%rdi
     ])
     ifelse(eval($2 > 15), 1, [
-      movdqa	144(%rsp), %xmm15
+      movdqu	144(%rsp), %xmm15
     ])
     ifelse(eval($2 > 14), 1, [
-      movdqa	128(%rsp), %xmm14
+      movdqu	128(%rsp), %xmm14
     ])
     ifelse(eval($2 > 13), 1, [
-      movdqa	112(%rsp), %xmm13
+      movdqu	112(%rsp), %xmm13
     ])
     ifelse(eval($2 > 12), 1, [
-      movdqa	96(%rsp), %xmm12
+      movdqu	96(%rsp), %xmm12
     ])
     ifelse(eval($2 > 11), 1, [
-      movdqa	80(%rsp), %xmm11
+      movdqu	80(%rsp), %xmm11
     ])
     ifelse(eval($2 > 10), 1, [
-      movdqa	64(%rsp), %xmm10
+      movdqu	64(%rsp), %xmm10
     ])
     ifelse(eval($2 > 9), 1, [
-      movdqa	48(%rsp), %xmm9
+      movdqu	48(%rsp), %xmm9
     ])
     ifelse(eval($2 > 8), 1, [
-      movdqa	32(%rsp), %xmm8
+      movdqu	32(%rsp), %xmm8
     ])
     ifelse(eval($2 > 7), 1, [
-      movdqa	16(%rsp), %xmm7
+      movdqu	16(%rsp), %xmm7
     ])
     ifelse(eval($2 > 6), 1, [
-      movdqa	0(%rsp), %xmm6
+      movdqu	0(%rsp), %xmm6
       add	[$]eval(8 + 16*($2 - 6)), %rsp
     ])
   ])
-- 
GitLab