Commit 1601c138 authored by Parag Salasakar's avatar Parag Salasakar
Browse files

mips msa vp9 idct 32x32 optimization

average improvement ~4x-6x

Change-Id: Idaba7e49fbd7f388caee0d73773ccf6e4807ef17
parent d1cdda88
......@@ -382,4 +382,12 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fdct32x32_rd_avx2,
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, Trans32x32Test,
::testing::Values(
make_tuple(&vp9_fdct32x32_c,
&vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
......@@ -309,14 +309,18 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
MSA, PartialIDctTest,
::testing::Values(
make_tuple(&vp9_fdct32x32_c,
&vp9_idct32x32_1024_add_c,
&vp9_idct32x32_34_add_msa,
TX_32X32, 34),
make_tuple(&vp9_fdct32x32_c,
&vp9_idct32x32_1024_add_c,
&vp9_idct32x32_1_add_msa,
TX_32X32, 1),
make_tuple(&vp9_fdct16x16_c,
&vp9_idct16x16_256_add_c,
&vp9_idct16x16_10_add_msa,
TX_16X16, 10),
make_tuple(&vp9_fdct16x16_c,
&vp9_idct16x16_256_add_msa,
&vp9_idct16x16_10_add_c,
TX_16X16, 10),
make_tuple(&vp9_fdct16x16_c,
&vp9_idct16x16_256_add_c,
&vp9_idct16x16_1_add_msa,
......
This diff is collapsed.
......@@ -358,6 +358,14 @@
src = (v16u8)__msa_insert_d((v2i64)(src), 1, (src1)); \
}
#define STORE_4VECS_SH(ptr, stride, \
in0, in1, in2, in3) { \
STORE_SH(in0, ((ptr) + 0 * stride)); \
STORE_SH(in1, ((ptr) + 1 * stride)); \
STORE_SH(in2, ((ptr) + 2 * stride)); \
STORE_SH(in3, ((ptr) + 3 * stride)); \
}
#define STORE_8VECS_SH(ptr, stride, \
in0, in1, in2, in3, \
in4, in5, in6, in7) { \
......
......@@ -443,15 +443,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
#is this a typo?
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
......
......@@ -138,6 +138,7 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_avg_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment