From b2aa4017760c9bf9d267470ac235650e24b8adc5 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Fri, 11 Mar 2011 08:24:23 -0500
Subject: [PATCH] Align SAD output array to be 16-byte aligned

Use aligned store.

Change-Id: Icab4c0c53da811d0c52bb7e8134927f249ba2499
---
 vp8/encoder/mcomp.c          |  2 +-
 vp8/encoder/x86/sad_sse4.asm | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 33aaa2ca93..c210c1de2b 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1415,7 +1415,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     int col_min = ref_col - distance;
     int col_max = ref_col + distance;
 
-    unsigned short sad_array8[8];
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
     unsigned int sad_array[3];
 
     // Work out the mid point for the search
diff --git a/vp8/encoder/x86/sad_sse4.asm b/vp8/encoder/x86/sad_sse4.asm
index 21e2e50072..03ecec4b34 100644
--- a/vp8/encoder/x86/sad_sse4.asm
+++ b/vp8/encoder/x86/sad_sse4.asm
@@ -186,7 +186,7 @@ sym(vp8_sad16x16x8_sse4):
         PROCESS_16X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqu          XMMWORD PTR [rdi],    xmm1
+        movdqa          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -224,7 +224,7 @@ sym(vp8_sad16x8x8_sse4):
         PROCESS_16X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqu          XMMWORD PTR [rdi],    xmm1
+        movdqa          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -262,7 +262,7 @@ sym(vp8_sad8x8x8_sse4):
         PROCESS_8X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqu          XMMWORD PTR [rdi],    xmm1
+        movdqa          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -303,7 +303,7 @@ sym(vp8_sad8x16x8_sse4):
         PROCESS_8X2X8 0
         PROCESS_8X2X8 0
         mov             rdi,        arg(4)           ;Results
-        movdqu          XMMWORD PTR [rdi],    xmm1
+        movdqa          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -339,7 +339,7 @@ sym(vp8_sad4x4x8_sse4):
         PROCESS_4X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqu          XMMWORD PTR [rdi],    xmm1
+        movdqa          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
-- 
GitLab