Commit ae676953 authored by Yi Luo's avatar Yi Luo

Lowbd D207E/D63E/D45E intrapred x86 optimization

D207E
Predictor  SSE2 vs C
4x4        ~2.6X
4x8        ~2.5X
8x4        ~8.0X
8x8        ~9.1X
8x16       ~11.7X
16x8       ~16.9X
16x16      ~17.3X
16x32      ~17.2X
32x16      ~30.2X
32x32      ~35.5X

D63E
Predictor  SSE2 vs C
4x4        ~4.7X
4x8        ~4.9X
8x4        ~7.8X
8x8        ~8.9X
8x16       ~9.3X
16x8       ~15.7X
16x16      ~14.7X
16x32      ~17.3X
32x16      ~18.0X
32x32      ~15.7X

D45E
Predictor  SSSE3 vs C
4x4        ~1.8X
4x8        ~2.9X
8x4        ~6.7X
8x8        ~6.5X
8x16       ~7.4X
16x8       ~24.4X
16x16      ~21.5X
16x32      ~24.2X
32x16      ~25.4X
32x32      ~25.2X

Change-Id: I8215de190e2b6314272749761600e389d1ca0fdf
parent 08ee5c86
...@@ -163,7 +163,6 @@ specialize qw/aom_smooth_predictor_16x32 ssse3/; ...@@ -163,7 +163,6 @@ specialize qw/aom_smooth_predictor_16x32 ssse3/;
specialize qw/aom_smooth_predictor_32x16 ssse3/; specialize qw/aom_smooth_predictor_32x16 ssse3/;
specialize qw/aom_smooth_predictor_32x32 ssse3/; specialize qw/aom_smooth_predictor_32x32 ssse3/;
specialize qw/aom_d63e_predictor_4x4 ssse3/;
specialize qw/aom_d135_predictor_4x4 neon/; specialize qw/aom_d135_predictor_4x4 neon/;
specialize qw/aom_d153_predictor_4x4 ssse3/; specialize qw/aom_d153_predictor_4x4 ssse3/;
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/; specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
...@@ -180,6 +179,38 @@ specialize qw/aom_d153_predictor_32x32 ssse3/; ...@@ -180,6 +179,38 @@ specialize qw/aom_d153_predictor_32x32 ssse3/;
specialize qw/aom_dc_predictor_32x16 sse2 avx2/; specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/; specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
specialize qw/aom_d207e_predictor_4x4 sse2/;
specialize qw/aom_d207e_predictor_4x8 sse2/;
specialize qw/aom_d207e_predictor_8x4 sse2/;
specialize qw/aom_d207e_predictor_8x8 sse2/;
specialize qw/aom_d207e_predictor_8x16 sse2/;
specialize qw/aom_d207e_predictor_16x8 sse2/;
specialize qw/aom_d207e_predictor_16x16 sse2/;
specialize qw/aom_d207e_predictor_16x32 sse2/;
specialize qw/aom_d207e_predictor_32x16 sse2/;
specialize qw/aom_d207e_predictor_32x32 sse2/;
specialize qw/aom_d63e_predictor_4x4 sse2 ssse3/;
specialize qw/aom_d63e_predictor_4x8 sse2/;
specialize qw/aom_d63e_predictor_8x4 sse2/;
specialize qw/aom_d63e_predictor_8x8 sse2/;
specialize qw/aom_d63e_predictor_8x16 sse2/;
specialize qw/aom_d63e_predictor_16x8 sse2/;
specialize qw/aom_d63e_predictor_16x16 sse2/;
specialize qw/aom_d63e_predictor_16x32 sse2/;
specialize qw/aom_d63e_predictor_32x16 sse2/;
specialize qw/aom_d63e_predictor_32x32 sse2/;
specialize qw/aom_d45e_predictor_4x4 ssse3/;
specialize qw/aom_d45e_predictor_4x8 ssse3/;
specialize qw/aom_d45e_predictor_8x4 ssse3/;
specialize qw/aom_d45e_predictor_8x8 ssse3/;
specialize qw/aom_d45e_predictor_8x16 ssse3/;
specialize qw/aom_d45e_predictor_16x8 ssse3/;
specialize qw/aom_d45e_predictor_16x16 ssse3/;
specialize qw/aom_d45e_predictor_16x32 ssse3/;
specialize qw/aom_d45e_predictor_32x16 ssse3/;
specialize qw/aom_d45e_predictor_32x32 ssse3/;
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
specialize qw/aom_highbd_v_predictor_4x4 sse2/; specialize qw/aom_highbd_v_predictor_4x4 sse2/;
......
This diff is collapsed.
This diff is collapsed.
...@@ -331,6 +331,7 @@ INSTANTIATE_TEST_CASE_P(AVX2_TO_C_12, HighbdIntraPredTest, ...@@ -331,6 +331,7 @@ INSTANTIATE_TEST_CASE_P(AVX2_TO_C_12, HighbdIntraPredTest,
#if HAVE_SSE2 #if HAVE_SSE2
const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = { const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
lowbd_intrapred(d63e, sse2), lowbd_intrapred(d207e, sse2),
lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2), lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2),
lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2), lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2), lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2),
...@@ -341,6 +342,17 @@ INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest, ...@@ -341,6 +342,17 @@ INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest,
#endif // HAVE_SSE2 #endif // HAVE_SSE2
#if HAVE_SSSE3
const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
lowbd_entry(d63e, 4, 4, ssse3), lowbd_intrapred(d45e, ssse3),
lowbd_intrapred(paeth, ssse3), lowbd_intrapred(smooth, ssse3),
};
INSTANTIATE_TEST_CASE_P(SSSE3, LowbdIntraPredTest,
::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
#endif // HAVE_SSSE3
#if HAVE_AVX2 #if HAVE_AVX2
const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = { const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
lowbd_entry(dc, 32, 32, avx2), lowbd_entry(dc_top, 32, 32, avx2), lowbd_entry(dc, 32, 32, avx2), lowbd_entry(dc_top, 32, 32, avx2),
...@@ -356,16 +368,6 @@ const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = { ...@@ -356,16 +368,6 @@ const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest, INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
::testing::ValuesIn(LowbdIntraPredTestVectorAvx2)); ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2));
#endif // HAVE_SSE2 #endif // HAVE_AVX2
#if HAVE_SSSE3
const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
lowbd_intrapred(paeth, ssse3), lowbd_intrapred(smooth, ssse3),
};
INSTANTIATE_TEST_CASE_P(SSSE3, LowbdIntraPredTest,
::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
#endif // HAVE_SSSE3
} // namespace } // namespace
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment