OSUOSL/Nero are experiencing Internet connectivity problems. This affects us as we're hosted with OSUOSL. We apologize for the inconvenience.

Commit 26a6ce4c authored by Jian Zhou's avatar Jian Zhou

Code clean of highbd_tm_predictor_32x32

Remove the ARCH_X86_64 constraint. No performance hit on both
big core and small core.

Change-Id: I39860b62b7a0ae4acaafdca7d68f3e5820133a81
parent 355bfa21
......@@ -132,7 +132,6 @@ using std::tr1::make_tuple;
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_USE_X86INC
#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
......@@ -159,34 +158,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
&vpx_highbd_dc_predictor_32x32_c, 32, 8),
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 8),
make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
&vpx_highbd_v_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 8),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
#endif // !ARCH_X86_64
#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
......@@ -219,37 +191,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
&vpx_highbd_dc_predictor_32x32_c, 32,
10),
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
&vpx_highbd_tm_predictor_16x16_c, 16,
10),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16,
10),
make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 10),
make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
&vpx_highbd_v_predictor_16x16_c, 16, 10),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 10),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
#endif // !ARCH_X86_64
#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
......@@ -282,35 +224,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
&vpx_highbd_dc_predictor_32x32_c, 32,
12),
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
&vpx_highbd_tm_predictor_16x16_c, 16,
12),
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16,
12),
make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 12),
make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
&vpx_highbd_v_predictor_16x16_c, 16, 12),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 12),
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
#endif // !ARCH_X86_64
#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
......
......@@ -435,7 +435,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc";
......
......@@ -385,9 +385,8 @@ cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps
jnz .loop
REP_RET
%if ARCH_X86_64
INIT_XMM sse2
cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps
movd m0, [aboveq-2]
mova m1, [aboveq]
mova m2, [aboveq+16]
......@@ -395,70 +394,60 @@ cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
mova m4, [aboveq+48]
pshuflw m0, m0, 0x0
; Get the values to compute the maximum value at this bit depth
mov oned, 1
pxor m10, m10
pxor m11, m11
pinsrw m10, oned, 0
pinsrw m11, bpsd, 0
pshuflw m10, m10, 0x0
pcmpeqw m5, m5
movd m6, bpsd
psllw m5, m6
pcmpeqw m7, m7
pxor m6, m6 ; min possible value
pxor m5, m7 ; max possible value
punpcklqdq m0, m0
DEFINE_ARGS dst, stride, line, left
punpcklqdq m10, m10
mov lineq, -16
mova m5, m10
punpcklqdq m0, m0
psllw m10, m11
add leftq, 64
psubw m10, m5 ; max possible value
pxor m11, m11 ; min possible value
psubw m1, m0
psubw m2, m0
psubw m3, m0
psubw m4, m0
.loop:
movd m5, [leftq+lineq*4]
movd m6, [leftq+lineq*4+2]
pshuflw m5, m5, 0x0
pshuflw m6, m6, 0x0
punpcklqdq m5, m5
punpcklqdq m6, m6
paddw m7, m5, m1
paddw m8, m5, m2
paddw m9, m5, m3
paddw m5, m4
;Clamp these values to the bit-depth
pminsw m7, m10
pminsw m8, m10
pminsw m9, m10
pminsw m5, m10
pmaxsw m7, m11
pmaxsw m8, m11
pmaxsw m9, m11
pmaxsw m5, m11
;Store these values
mova [dstq ], m7
mova [dstq +16], m8
mova [dstq +32], m9
mova [dstq +48], m5
paddw m7, m6, m1
paddw m8, m6, m2
paddw m9, m6, m3
paddw m6, m4
;Clamp these values to the bit-depth
pminsw m7, m10
pminsw m8, m10
pminsw m9, m10
pminsw m6, m10
pmaxsw m7, m11
pmaxsw m8, m11
pmaxsw m9, m11
pmaxsw m6, m11
;Store these values
mova [dstq+strideq*2 ], m7
mova [dstq+strideq*2+16], m8
mova [dstq+strideq*2+32], m9
mova [dstq+strideq*2+48], m6
movd m7, [leftq]
pshuflw m7, m7, 0x0
punpcklqdq m7, m7 ; l1 l1 l1 l1 l1 l1 l1 l1
paddw m0, m7, m1
pminsw m0, m5
pmaxsw m0, m6
mova [dstq ], m0
paddw m0, m7, m2
pminsw m0, m5
pmaxsw m0, m6
mova [dstq +16], m0
paddw m0, m7, m3
pminsw m0, m5
pmaxsw m0, m6
mova [dstq +32], m0
paddw m0, m7, m4
pminsw m0, m5
pmaxsw m0, m6
mova [dstq +48], m0
movd m7, [leftq+2]
pshuflw m7, m7, 0x0
punpcklqdq m7, m7 ; l2 l2 l2 l2 l2 l2 l2 l2
paddw m0, m7, m1
pminsw m0, m5
pmaxsw m0, m6
mova [dstq+strideq*2 ], m0
paddw m0, m7, m2
pminsw m0, m5
pmaxsw m0, m6
mova [dstq+strideq*2+16], m0
paddw m0, m7, m3
pminsw m0, m5
pmaxsw m0, m6
mova [dstq+strideq*2+32], m0
paddw m0, m7, m4
pminsw m0, m5
pmaxsw m0, m6
mova [dstq+strideq*2+48], m0
lea dstq, [dstq+strideq*4]
lea leftq, [leftq+4]
inc lineq
jnz .loop
REP_RET
%endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment