OSUOSL/Nero are experiencing Internet connectivity problems. This affects us as we're hosted with OSUOSL. We apologize for the inconvenience.

Commit b158d9a6 authored by Jian Zhou's avatar Jian Zhou

Code clean of sad4xN(_avg)_sse

Replace MMX with SSE2, reduce psadbw ops which may help Silvermont.

Change-Id: Ic7aec15245c9e5b2f3903dc7631f38e60be7c93d
parent eace551c
......@@ -702,18 +702,6 @@ INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#if HAVE_SSE
#if CONFIG_USE_X86INC
const SadMxNParam sse_tests[] = {
make_tuple(4, 8, &vpx_sad4x8_sse, -1),
make_tuple(4, 4, &vpx_sad4x4_sse, -1),
};
INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::ValuesIn(sse_tests));
const SadMxNAvgParam avg_sse_tests[] = {
make_tuple(4, 8, &vpx_sad4x8_avg_sse, -1),
make_tuple(4, 4, &vpx_sad4x4_avg_sse, -1),
};
INSTANTIATE_TEST_CASE_P(SSE, SADavgTest, ::testing::ValuesIn(avg_sse_tests));
const SadMxNx4Param x4d_sse_tests[] = {
make_tuple(4, 8, &vpx_sad4x8x4d_sse, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_sse, -1),
......@@ -736,6 +724,8 @@ const SadMxNParam sse2_tests[] = {
make_tuple(8, 16, &vpx_sad8x16_sse2, -1),
make_tuple(8, 8, &vpx_sad8x8_sse2, -1),
make_tuple(8, 4, &vpx_sad8x4_sse2, -1),
make_tuple(4, 8, &vpx_sad4x8_sse2, -1),
make_tuple(4, 4, &vpx_sad4x4_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 8),
......@@ -786,6 +776,8 @@ const SadMxNAvgParam avg_sse2_tests[] = {
make_tuple(8, 16, &vpx_sad8x16_avg_sse2, -1),
make_tuple(8, 8, &vpx_sad8x8_avg_sse2, -1),
make_tuple(8, 4, &vpx_sad8x4_avg_sse2, -1),
make_tuple(4, 8, &vpx_sad4x8_avg_sse2, -1),
make_tuple(4, 4, &vpx_sad4x4_avg_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 8),
......
......@@ -990,10 +990,10 @@ add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride,
specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad4x8 msa/, "$sse_x86inc";
specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad4x4 mmx neon msa/, "$sse_x86inc";
specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc";
#
# Avg
......@@ -1061,10 +1061,10 @@ add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stri
specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad4x8_avg msa/, "$sse_x86inc";
specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad4x4_avg msa/, "$sse_x86inc";
specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
......
......@@ -222,8 +222,8 @@ SAD8XN 16, 1 ; sad8x16_avg_sse2
SAD8XN 8, 1 ; sad8x8_avg_sse2
SAD8XN 4, 1 ; sad8x4_avg_sse2
; unsigned int vpx_sad4x{4, 8}_sse(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
; unsigned int vpx_sad4x{4, 8}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
%macro SAD4XN 1-2 0
SAD_FN 4, %1, 7, %2
mov n_rowsd, %1/4
......@@ -236,10 +236,10 @@ SAD8XN 4, 1 ; sad8x4_avg_sse2
movd m4, [refq+ref_stride3q]
punpckldq m1, m2
punpckldq m3, m4
movlhps m1, m3
%if %2 == 1
pavgb m1, [second_predq+mmsize*0]
pavgb m3, [second_predq+mmsize*1]
lea second_predq, [second_predq+mmsize*2]
lea second_predq, [second_predq+mmsize*1]
%endif
movd m2, [srcq]
movd m5, [srcq+src_strideq]
......@@ -247,20 +247,21 @@ SAD8XN 4, 1 ; sad8x4_avg_sse2
movd m6, [srcq+src_stride3q]
punpckldq m2, m5
punpckldq m4, m6
movlhps m2, m4
psadbw m1, m2
psadbw m3, m4
lea refq, [refq+ref_strideq*4]
paddd m0, m1
lea srcq, [srcq+src_strideq*4]
paddd m0, m3
dec n_rowsd
jg .loop
movhlps m1, m0
paddd m0, m1
movd eax, m0
RET
%endmacro
INIT_MMX sse
INIT_XMM sse2
SAD4XN 8 ; sad4x8_sse
SAD4XN 4 ; sad4x4_sse
SAD4XN 8, 1 ; sad4x8_avg_sse
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment