Commit 714aa9f3 authored by Jim Bankoski's avatar Jim Bankoski
Browse files

this commit converts all sad ptrs to uint32

sse4_1 code used uint16_t for returning sad, but that
won't work for 32x32 or 64x64.   This code fixes the
assembly for those and also reenables sse4_1 on linux

Change-Id: I5ce7288d581db870a148e5f7c5092826f59edd81
parent b715e371
...@@ -997,17 +997,6 @@ process_common_toolchain() { ...@@ -997,17 +997,6 @@ process_common_toolchain() {
#error "not x32" #error "not x32"
#endif #endif
EOF EOF
soft_enable runtime_cpu_detect
soft_enable mmx
soft_enable sse
soft_enable sse2
soft_enable sse3
soft_enable ssse3
if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "
else
soft_enable sse4_1
fi
case ${tgt_os} in case ${tgt_os} in
win*) win*)
...@@ -1061,6 +1050,18 @@ EOF ...@@ -1061,6 +1050,18 @@ EOF
;; ;;
esac esac
soft_enable runtime_cpu_detect
soft_enable mmx
soft_enable sse
soft_enable sse2
soft_enable sse3
soft_enable ssse3
if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "
else
soft_enable sse4_1
fi
case "${AS}" in case "${AS}" in
auto|"") auto|"")
which nasm >/dev/null 2>&1 && AS=nasm which nasm >/dev/null 2>&1 && AS=nasm
......
...@@ -449,25 +449,25 @@ specialize vp9_sad8x8x3 sse3 ...@@ -449,25 +449,25 @@ specialize vp9_sad8x8x3 sse3
prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x4x3 sse3 specialize vp9_sad4x4x3 sse3
prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad64x64x8 specialize vp9_sad64x64x8
prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad32x32x8 specialize vp9_sad32x32x8
prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad16x16x8 sse4 specialize vp9_sad16x16x8 sse4
prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad16x8x8 sse4 specialize vp9_sad16x8x8 sse4
prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad8x16x8 sse4 specialize vp9_sad8x16x8 sse4
prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad8x8x8 sse4 specialize vp9_sad8x8x8 sse4
prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad4x4x8 sse4 specialize vp9_sad4x4x8 sse4
prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
...@@ -490,7 +490,6 @@ specialize vp9_sad8x8x4d sse2 ...@@ -490,7 +490,6 @@ specialize vp9_sad8x8x4d sse2
prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x4x4d sse specialize vp9_sad4x4x4d sse
prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
specialize vp9_sub_pixel_mse16x16 sse2 mmx specialize vp9_sub_pixel_mse16x16 sse2 mmx
......
...@@ -1782,7 +1782,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, ...@@ -1782,7 +1782,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_min = ref_col - distance; int col_min = ref_col - distance;
int col_max = ref_col + distance; int col_max = ref_col + distance;
DECLARE_ALIGNED_ARRAY(16, uint16_t, sad_array8, 8); DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
unsigned int sad_array[3]; unsigned int sad_array[3];
int_mv fcenter_mv; int_mv fcenter_mv;
......
This diff is collapsed.
...@@ -29,7 +29,7 @@ typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr, ...@@ -29,7 +29,7 @@ typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr,
int source_stride, int source_stride,
const uint8_t *ref_ptr, const uint8_t *ref_ptr,
int ref_stride, int ref_stride,
unsigned short *sad_array); unsigned int *sad_array);
typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr, typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr,
int source_stride, int source_stride,
......
...@@ -154,6 +154,16 @@ ...@@ -154,6 +154,16 @@
paddw xmm1, xmm5 paddw xmm1, xmm5
%endmacro %endmacro
%macro WRITE_AS_INTS 0
mov rdi, arg(4) ;Results
pxor xmm0, xmm0
movdqa xmm2, xmm1
punpcklwd xmm1, xmm0
punpckhwd xmm2, xmm0
movdqa [rdi], xmm1
movdqa [rdi + 16], xmm2
%endmacro
;void vp9_sad16x16x8_sse4( ;void vp9_sad16x16x8_sse4(
; const unsigned char *src_ptr, ; const unsigned char *src_ptr,
...@@ -170,23 +180,22 @@ sym(vp9_sad16x16x8_sse4): ...@@ -170,23 +180,22 @@ sym(vp9_sad16x16x8_sse4):
push rdi push rdi
; end prolog ; end prolog
mov rsi, arg(0) ;src_ptr mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1 PROCESS_16X2X8 1
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
mov rdi, arg(4) ;Results WRITE_AS_INTS
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog ; begin epilog
pop rdi pop rdi
...@@ -212,19 +221,18 @@ sym(vp9_sad16x8x8_sse4): ...@@ -212,19 +221,18 @@ sym(vp9_sad16x8x8_sse4):
push rdi push rdi
; end prolog ; end prolog
mov rsi, arg(0) ;src_ptr mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1 PROCESS_16X2X8 1
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
PROCESS_16X2X8 0 PROCESS_16X2X8 0
mov rdi, arg(4) ;Results WRITE_AS_INTS
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog ; begin epilog
pop rdi pop rdi
...@@ -250,19 +258,18 @@ sym(vp9_sad8x8x8_sse4): ...@@ -250,19 +258,18 @@ sym(vp9_sad8x8x8_sse4):
push rdi push rdi
; end prolog ; end prolog
mov rsi, arg(0) ;src_ptr mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_8X2X8 1 PROCESS_8X2X8 1
PROCESS_8X2X8 0 PROCESS_8X2X8 0
PROCESS_8X2X8 0 PROCESS_8X2X8 0
PROCESS_8X2X8 0 PROCESS_8X2X8 0
mov rdi, arg(4) ;Results WRITE_AS_INTS
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog ; begin epilog
pop rdi pop rdi
...@@ -288,22 +295,22 @@ sym(vp9_sad8x16x8_sse4): ...@@ -288,22 +295,22 @@ sym(vp9_sad8x16x8_sse4):
push rdi push rdi
; end prolog ; end prolog
mov rsi, arg(0) ;src_ptr mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
movsxd rax, dword ptr arg(1) ;src_stride PROCESS_8X2X8 1
movsxd rdx, dword ptr arg(3) ;ref_stride PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 1 WRITE_AS_INTS
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog ; begin epilog
pop rdi pop rdi
...@@ -329,17 +336,16 @@ sym(vp9_sad4x4x8_sse4): ...@@ -329,17 +336,16 @@ sym(vp9_sad4x4x8_sse4):
push rdi push rdi
; end prolog ; end prolog
mov rsi, arg(0) ;src_ptr mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_4X2X8 1 PROCESS_4X2X8 1
PROCESS_4X2X8 0 PROCESS_4X2X8 0
mov rdi, arg(4) ;Results WRITE_AS_INTS
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog ; begin epilog
pop rdi pop rdi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment