Commit 29a17db9 authored by Parag Salasakar's avatar Parag Salasakar Committed by Gerrit Code Review

Merge "mips msa vpx_dsp sad sad4d avgsad optimization"

parents 440995ca bc3ec8ef
......@@ -1114,4 +1114,98 @@ const SadMxNx4Param x4d_avx2_tests[] = {
INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
#endif // HAVE_AVX2
//------------------------------------------------------------------------------
// MIPS functions
#if HAVE_MSA
const SadMxNFunc sad64x64_msa = vpx_sad64x64_msa;
const SadMxNFunc sad64x32_msa = vpx_sad64x32_msa;
const SadMxNFunc sad32x64_msa = vpx_sad32x64_msa;
const SadMxNFunc sad32x32_msa = vpx_sad32x32_msa;
const SadMxNFunc sad32x16_msa = vpx_sad32x16_msa;
const SadMxNFunc sad16x32_msa = vpx_sad16x32_msa;
const SadMxNFunc sad16x16_msa = vpx_sad16x16_msa;
const SadMxNFunc sad16x8_msa = vpx_sad16x8_msa;
const SadMxNFunc sad8x16_msa = vpx_sad8x16_msa;
const SadMxNFunc sad8x8_msa = vpx_sad8x8_msa;
const SadMxNFunc sad8x4_msa = vpx_sad8x4_msa;
const SadMxNFunc sad4x8_msa = vpx_sad4x8_msa;
const SadMxNFunc sad4x4_msa = vpx_sad4x4_msa;
const SadMxNParam msa_tests[] = {
make_tuple(64, 64, sad64x64_msa, -1),
make_tuple(64, 32, sad64x32_msa, -1),
make_tuple(32, 64, sad32x64_msa, -1),
make_tuple(32, 32, sad32x32_msa, -1),
make_tuple(32, 16, sad32x16_msa, -1),
make_tuple(16, 32, sad16x32_msa, -1),
make_tuple(16, 16, sad16x16_msa, -1),
make_tuple(16, 8, sad16x8_msa, -1),
make_tuple(8, 16, sad8x16_msa, -1),
make_tuple(8, 8, sad8x8_msa, -1),
make_tuple(8, 4, sad8x4_msa, -1),
make_tuple(4, 8, sad4x8_msa, -1),
make_tuple(4, 4, sad4x4_msa, -1),
};
INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests));
const SadMxNAvgFunc sad64x64_avg_msa = vpx_sad64x64_avg_msa;
const SadMxNAvgFunc sad64x32_avg_msa = vpx_sad64x32_avg_msa;
const SadMxNAvgFunc sad32x64_avg_msa = vpx_sad32x64_avg_msa;
const SadMxNAvgFunc sad32x32_avg_msa = vpx_sad32x32_avg_msa;
const SadMxNAvgFunc sad32x16_avg_msa = vpx_sad32x16_avg_msa;
const SadMxNAvgFunc sad16x32_avg_msa = vpx_sad16x32_avg_msa;
const SadMxNAvgFunc sad16x16_avg_msa = vpx_sad16x16_avg_msa;
const SadMxNAvgFunc sad16x8_avg_msa = vpx_sad16x8_avg_msa;
const SadMxNAvgFunc sad8x16_avg_msa = vpx_sad8x16_avg_msa;
const SadMxNAvgFunc sad8x8_avg_msa = vpx_sad8x8_avg_msa;
const SadMxNAvgFunc sad8x4_avg_msa = vpx_sad8x4_avg_msa;
const SadMxNAvgFunc sad4x8_avg_msa = vpx_sad4x8_avg_msa;
const SadMxNAvgFunc sad4x4_avg_msa = vpx_sad4x4_avg_msa;
const SadMxNAvgParam avg_msa_tests[] = {
make_tuple(64, 64, sad64x64_avg_msa, -1),
make_tuple(64, 32, sad64x32_avg_msa, -1),
make_tuple(32, 64, sad32x64_avg_msa, -1),
make_tuple(32, 32, sad32x32_avg_msa, -1),
make_tuple(32, 16, sad32x16_avg_msa, -1),
make_tuple(16, 32, sad16x32_avg_msa, -1),
make_tuple(16, 16, sad16x16_avg_msa, -1),
make_tuple(16, 8, sad16x8_avg_msa, -1),
make_tuple(8, 16, sad8x16_avg_msa, -1),
make_tuple(8, 8, sad8x8_avg_msa, -1),
make_tuple(8, 4, sad8x4_avg_msa, -1),
make_tuple(4, 8, sad4x8_avg_msa, -1),
make_tuple(4, 4, sad4x4_avg_msa, -1),
};
INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests));
const SadMxNx4Func sad64x64x4d_msa = vpx_sad64x64x4d_msa;
const SadMxNx4Func sad64x32x4d_msa = vpx_sad64x32x4d_msa;
const SadMxNx4Func sad32x64x4d_msa = vpx_sad32x64x4d_msa;
const SadMxNx4Func sad32x32x4d_msa = vpx_sad32x32x4d_msa;
const SadMxNx4Func sad32x16x4d_msa = vpx_sad32x16x4d_msa;
const SadMxNx4Func sad16x32x4d_msa = vpx_sad16x32x4d_msa;
const SadMxNx4Func sad16x16x4d_msa = vpx_sad16x16x4d_msa;
const SadMxNx4Func sad16x8x4d_msa = vpx_sad16x8x4d_msa;
const SadMxNx4Func sad8x16x4d_msa = vpx_sad8x16x4d_msa;
const SadMxNx4Func sad8x8x4d_msa = vpx_sad8x8x4d_msa;
const SadMxNx4Func sad8x4x4d_msa = vpx_sad8x4x4d_msa;
const SadMxNx4Func sad4x8x4d_msa = vpx_sad4x8x4d_msa;
const SadMxNx4Func sad4x4x4d_msa = vpx_sad4x4x4d_msa;
const SadMxNx4Param x4d_msa_tests[] = {
make_tuple(64, 64, sad64x64x4d_msa, -1),
make_tuple(64, 32, sad64x32x4d_msa, -1),
make_tuple(32, 64, sad32x64x4d_msa, -1),
make_tuple(32, 32, sad32x32x4d_msa, -1),
make_tuple(32, 16, sad32x16x4d_msa, -1),
make_tuple(16, 32, sad16x32x4d_msa, -1),
make_tuple(16, 16, sad16x16x4d_msa, -1),
make_tuple(16, 8, sad16x8x4d_msa, -1),
make_tuple(8, 16, sad8x16x4d_msa, -1),
make_tuple(8, 8, sad8x8x4d_msa, -1),
make_tuple(8, 4, sad8x4x4d_msa, -1),
make_tuple(4, 8, sad4x8x4d_msa, -1),
make_tuple(4, 4, sad4x4x4d_msa, -1),
};
INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
#endif // HAVE_MSA
} // namespace
......@@ -82,12 +82,24 @@
}
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
#define LD_B3(RTYPE, psrc, stride, out0, out1, out2) { \
LD_B2(RTYPE, (psrc), stride, out0, out1); \
out2 = LD_B(RTYPE, (psrc) + 2 * stride); \
}
#define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__)
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
LD_B2(RTYPE, (psrc), stride, out0, out1); \
LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
}
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
#define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) { \
LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
out4 = LD_B(RTYPE, (psrc) + 4 * stride); \
}
#define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)
/* Description : Load vectors with 8 halfword elements with stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
......@@ -105,6 +117,40 @@
}
#define LD_SH4(...) LD_H4(v8i16, __VA_ARGS__)
/* Description : average with rounding (in0 + in1 + 1) / 2.
Arguments : Inputs - in0, in1, in2, in3,
Outputs - out0, out1
Return Type - as per RTYPE
Details : Each unsigned byte element from 'in0' vector is added with
each unsigned byte element from 'in1' vector. Then the average
with rounding is calculated and written to 'out0'
*/
#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \
out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3); \
}
#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
#define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3) { \
AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
AVER_UB2(RTYPE, in4, in5, in6, in7, out2, out3) \
}
#define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
/* Description : Immediate number of elements to slide
Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by
value specified in the 'slide_val'
*/
#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) { \
out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \
out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \
}
#define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)
/* Description : Dot product & addition of halfword vector elements
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
......@@ -155,6 +201,26 @@
sum_m; \
})
/* Description : Horizontal addition of 8 unsigned halfword elements
Arguments : Inputs - in (unsigned halfword vector)
Outputs - sum_m (u32 sum)
Return Type - unsigned word
Details : 8 unsigned halfword elements of input vector are added
together and the resulting integer sum is returned
*/
#define HADD_UH_U32(in) ({ \
v4u32 res_m; \
v2u64 res0_m, res1_m; \
uint32_t sum_m; \
\
res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in); \
res0_m = __msa_hadd_u_d(res_m, res_m); \
res1_m = (v2u64)__msa_splati_d((v2i64)res0_m, 1); \
res0_m = res0_m + res1_m; \
sum_m = __msa_copy_u_w((v4i32)res0_m, 0); \
sum_m; \
})
/* Description : Horizontal subtraction of unsigned byte vector elements
Arguments : Inputs - in0, in1
Outputs - out0, out1
......@@ -169,6 +235,27 @@
}
#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
/* Description : SAD (Sum of Absolute Difference)
Arguments : Inputs - in0, in1, ref0, ref1
Outputs - sad_m (halfword vector)
Return Type - unsigned halfword
Details : Absolute difference of all the byte elements from 'in0' with
'ref0' is calculated and preserved in 'diff0'. Then even-odd
pairs are added together to generate 8 halfword results.
*/
#define SAD_UB2_UH(in0, in1, ref0, ref1) ({ \
v16u8 diff0_m, diff1_m; \
v8u16 sad_m = { 0 }; \
\
diff0_m = __msa_asub_u_b((v16u8)in0, (v16u8)ref0); \
diff1_m = __msa_asub_u_b((v16u8)in1, (v16u8)ref1); \
\
sad_m += __msa_hadd_u_h((v16u8)diff0_m, (v16u8)diff0_m); \
sad_m += __msa_hadd_u_h((v16u8)diff1_m, (v16u8)diff1_m); \
\
sad_m; \
})
/* Description : Set element n input vector to GPR value
Arguments : Inputs - in0, in1, in2, in3
Output - out
......
This diff is collapsed.
......@@ -28,6 +28,7 @@ DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
......
......@@ -40,85 +40,85 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes") {
# Single block SAD
#
add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad64x64 avx2 neon/, "$sse2_x86inc";
specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad64x32 avx2/, "$sse2_x86inc";
specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad32x64 avx2/, "$sse2_x86inc";
specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad32x32 avx2 neon/, "$sse2_x86inc";
specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad32x16 avx2/, "$sse2_x86inc";
specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad16x32/, "$sse2_x86inc";
specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad16x16 mmx media neon/, "$sse2_x86inc";
specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad16x8 mmx neon/, "$sse2_x86inc";
specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad8x16 mmx neon/, "$sse2_x86inc";
specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad8x8 mmx neon/, "$sse2_x86inc";
specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad8x4/, "$sse2_x86inc";
specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad4x8/, "$sse_x86inc";
specialize qw/vpx_sad4x8 msa/, "$sse_x86inc";
add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad4x4 mmx neon/, "$sse_x86inc";
specialize qw/vpx_sad4x4 mmx neon msa/, "$sse_x86inc";
#
# Avg
#
add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad64x64_avg avx2/, "$sse2_x86inc";
specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad64x32_avg avx2/, "$sse2_x86inc";
specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x64_avg avx2/, "$sse2_x86inc";
specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x32_avg avx2/, "$sse2_x86inc";
specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad32x16_avg avx2/, "$sse2_x86inc";
specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad16x32_avg/, "$sse2_x86inc";
specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad16x16_avg/, "$sse2_x86inc";
specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad16x8_avg/, "$sse2_x86inc";
specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad8x16_avg/, "$sse2_x86inc";
specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad8x8_avg/, "$sse2_x86inc";
specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad8x4_avg/, "$sse2_x86inc";
specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad4x8_avg/, "$sse_x86inc";
specialize qw/vpx_sad4x8_avg msa/, "$sse_x86inc";
add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad4x4_avg/, "$sse_x86inc";
specialize qw/vpx_sad4x4_avg msa/, "$sse_x86inc";
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
......@@ -171,43 +171,43 @@ specialize qw/vpx_sad4x4x8 sse4_1/;
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad64x64x4d avx2 neon/, "$sse2_x86inc";
specialize qw/vpx_sad64x64x4d avx2 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad64x32x4d/, "$sse2_x86inc";
specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad32x64x4d/, "$sse2_x86inc";
specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad32x32x4d avx2 neon/, "$sse2_x86inc";
specialize qw/vpx_sad32x32x4d avx2 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad32x16x4d/, "$sse2_x86inc";
specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad16x32x4d/, "$sse2_x86inc";
specialize qw/vpx_sad16x32x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad16x16x4d neon/, "$sse2_x86inc";
specialize qw/vpx_sad16x16x4d neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad16x8x4d/, "$sse2_x86inc";
specialize qw/vpx_sad16x8x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x16x4d/, "$sse2_x86inc";
specialize qw/vpx_sad8x16x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x8x4d/, "$sse2_x86inc";
specialize qw/vpx_sad8x8x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad8x4x4d/, "$sse2_x86inc";
specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x8x4d/, "$sse_x86inc";
specialize qw/vpx_sad4x8x4d msa/, "$sse_x86inc";
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x4x4d/, "$sse_x86inc";
specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment