Commit cbce09ce authored by James Zern's avatar James Zern Committed by Gerrit Code Review
Browse files

Merge changes I6abc0657,I8224fba2,I04f64a45,I5d49d119,I76b4d171,I88c11ac3

* changes:
  vp9_sub_pixel_*variance*: disable avx2 variants
  vp9_sad*x4d: disable avx2 variants
  vp9_f(dct|ht): disable avx2 variants
  convolve: disable avx2 variants
  fdct8x8_test: add missing avx2 functions
  dct4x4_test: add missing avx2 functions
parents 3659fbd3 520cb3f3
......@@ -645,6 +645,26 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
#endif
#if HAVE_AVX2
// TODO(jzern): these prototypes can be removed after the avx2 versions are
// reenabled in vp9_rtcd_defs.pl.
extern "C" {
void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h);
void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h);
void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h);
}
const ConvolveFunctions convolve8_avx2(
vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
......@@ -655,8 +675,10 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
make_tuple(8, 4, &convolve8_avx2),
make_tuple(4, 8, &convolve8_avx2),
make_tuple(8, 8, &convolve8_avx2),
make_tuple(8, 16, &convolve8_avx2)));
INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values(
make_tuple(16, 8, &convolve8_avx2),
make_tuple(8, 16, &convolve8_avx2),
make_tuple(16, 16, &convolve8_avx2),
make_tuple(32, 16, &convolve8_avx2),
make_tuple(16, 32, &convolve8_avx2),
......
......@@ -606,4 +606,29 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0)));
#endif
#if HAVE_AVX2
// TODO(jzern): these prototypes can be removed after the avx2 versions are
// reenabled in vp9_rtcd_defs.pl.
extern "C" {
void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride);
void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, int stride,
int tx_type);
}
INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, Trans16x16DCT,
::testing::Values(
make_tuple(&vp9_fdct16x16_avx2,
&vp9_idct16x16_256_add_c, 0)));
INSTANTIATE_TEST_CASE_P(
AVX2, Trans16x16HT,
::testing::Values(
make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 3)));
INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, Trans16x16HT,
::testing::Values(
make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 0),
make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 1),
make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 2)));
#endif
} // namespace
......@@ -376,4 +376,19 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
#endif
#if HAVE_AVX2
INSTANTIATE_TEST_CASE_P(
AVX2, Trans4x4DCT,
::testing::Values(
make_tuple(&vp9_fdct4x4_avx2,
&vp9_idct4x4_16_add_c, 0)));
INSTANTIATE_TEST_CASE_P(
AVX2, Trans4x4HT,
::testing::Values(
make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 0),
make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 1),
make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 2),
make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 3)));
#endif
} // namespace
......@@ -367,4 +367,18 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(
make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0)));
#endif
#if HAVE_AVX2
INSTANTIATE_TEST_CASE_P(
AVX2, FwdTrans8x8DCT,
::testing::Values(
make_tuple(&vp9_fdct8x8_avx2, &vp9_idct8x8_64_add_c, 0)));
INSTANTIATE_TEST_CASE_P(
AVX2, FwdTrans8x8HT,
::testing::Values(
make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 0),
make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 1),
make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 2),
make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 3)));
#endif
} // namespace
......@@ -627,4 +627,24 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSSE3
#if HAVE_AVX2
#if CONFIG_VP9_ENCODER
// TODO(jzern): these prototypes can be removed after the avx2 versions are
// reenabled in vp9_rtcd_defs.pl.
extern "C" {
void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_ptr[], int ref_stride,
unsigned int *sad_array);
void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_ptr[], int ref_stride,
unsigned int *sad_array);
}
const sad_n_by_n_by_4_fn_t sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
const sad_n_by_n_by_4_fn_t sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
make_tuple(32, 32, sad_32x32x4d_avx2),
make_tuple(64, 64, sad_64x64x4d_avx2)));
#endif // CONFIG_VP9_ENCODER
#endif // HAVE_AVX2
} // namespace
......@@ -702,6 +702,57 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
#endif
#endif
#if HAVE_AVX2
// TODO(jzern): these prototypes can be removed after the avx2 versions are
// reenabled in vp9_rtcd_defs.pl.
extern "C" {
unsigned int vp9_sub_pixel_variance32x32_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp9_sub_pixel_variance64x64_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred);
unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred);
}
const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
const vp9_variance_fn_t variance64x32_avx2 = vp9_variance64x32_avx2;
const vp9_variance_fn_t variance64x64_avx2 = vp9_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VP9VarianceTest,
::testing::Values(make_tuple(4, 4, variance16x16_avx2),
make_tuple(5, 4, variance32x16_avx2),
make_tuple(5, 5, variance32x32_avx2),
make_tuple(6, 5, variance64x32_avx2),
make_tuple(6, 6, variance64x64_avx2)));
const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
vp9_sub_pixel_variance32x32_avx2;
const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
vp9_sub_pixel_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, VP9SubpelVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
make_tuple(6, 6, subpel_variance64x64_avx2)));
const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
vp9_sub_pixel_avg_variance32x32_avx2;
const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
vp9_sub_pixel_avg_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
DISABLED_AVX2, VP9SubpelAvgVarianceTest,
::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
#endif // HAVE_AVX2
#endif // CONFIG_VP9_ENCODER
} // namespace vp9
......
......@@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_neon_asm=vp9_convolve8_neon;
add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
......@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
......@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
......@@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vp9_sad4x4x8 sse4/;
add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad64x64x4d sse2 avx2/;
specialize qw/vp9_sad64x64x4d sse2/;
add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x64x4d sse2/;
......@@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co
specialize qw/vp9_sad16x32x4d sse2/;
add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x32x4d sse2 avx2/;
specialize qw/vp9_sad32x32x4d sse2/;
add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x4d sse2/;
......@@ -739,7 +739,7 @@ add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int strid
specialize qw/vp9_fht8x8 sse2 avx2/;
add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
specialize qw/vp9_fht16x16 sse2 avx2/;
specialize qw/vp9_fht16x16 sse2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
......@@ -751,7 +751,7 @@ add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stri
specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct16x16 sse2 avx2/;
specialize qw/vp9_fdct16x16 sse2/;
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct32x32 sse2 avx2/;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment