Commit fa96eeb8 authored by Ronald S. Bultje's avatar Ronald S. Bultje
Browse files

Implement SSE version for sad4x8x4d and SSE2 version for sad8x4x4d.

Encoding time of crew (CIF, first 50 frames) @ 1500kbps goes from 4min56
to 4min42.

Change-Id: I92c0c8b32980d2ae7c6dafc8b883a2c7fcd14a9f
parent 1fa04e1a
......@@ -332,15 +332,31 @@ INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
#if CONFIG_VP9_ENCODER
const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c;
const sad_n_by_n_by_4_fn_t sad_64x32x4d_c = vp9_sad64x32x4d_c;
const sad_n_by_n_by_4_fn_t sad_32x64x4d_c = vp9_sad32x64x4d_c;
const sad_n_by_n_by_4_fn_t sad_32x32x4d_c = vp9_sad32x32x4d_c;
const sad_n_by_n_by_4_fn_t sad_32x16x4d_c = vp9_sad32x16x4d_c;
const sad_n_by_n_by_4_fn_t sad_16x32x4d_c = vp9_sad16x32x4d_c;
const sad_n_by_n_by_4_fn_t sad_16x16x4d_c = vp9_sad16x16x4d_c;
const sad_n_by_n_by_4_fn_t sad_16x8x4d_c = vp9_sad16x8x4d_c;
const sad_n_by_n_by_4_fn_t sad_8x16x4d_c = vp9_sad8x16x4d_c;
const sad_n_by_n_by_4_fn_t sad_8x8x4d_c = vp9_sad8x8x4d_c;
const sad_n_by_n_by_4_fn_t sad_8x4x4d_c = vp9_sad8x4x4d_c;
const sad_n_by_n_by_4_fn_t sad_4x8x4d_c = vp9_sad4x8x4d_c;
const sad_n_by_n_by_4_fn_t sad_4x4x4d_c = vp9_sad4x4x4d_c;
INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values(
make_tuple(64, 64, sad_64x64x4d_c),
make_tuple(64, 32, sad_64x32x4d_c),
make_tuple(32, 64, sad_32x64x4d_c),
make_tuple(32, 32, sad_32x32x4d_c),
make_tuple(32, 16, sad_32x16x4d_c),
make_tuple(16, 32, sad_16x32x4d_c),
make_tuple(16, 16, sad_16x16x4d_c),
make_tuple(16, 8, sad_16x8x4d_c),
make_tuple(8, 16, sad_8x16x4d_c),
make_tuple(8, 8, sad_8x8x4d_c),
make_tuple(8, 4, sad_8x4x4d_c),
make_tuple(4, 8, sad_4x8x4d_c),
make_tuple(4, 4, sad_4x4x4d_c)));
#endif
......@@ -407,8 +423,10 @@ const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
make_tuple(4, 4, sad_4x4_sse_vp9)));
const sad_n_by_n_by_4_fn_t sad_4x8x4d_sse = vp9_sad4x8x4d_sse;
const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse = vp9_sad4x4x4d_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
make_tuple(4, 8, sad_4x8x4d_sse),
make_tuple(4, 4, sad_4x4x4d_sse)));
#endif
#endif
......@@ -450,18 +468,28 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
#if CONFIG_VP9_ENCODER
const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x16x4d_sse2 = vp9_sad32x16x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_16x32x4d_sse2 = vp9_sad16x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_8x4x4d_sse2 = vp9_sad8x4x4d_sse2;
INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
make_tuple(64, 64, sad_64x64x4d_sse2),
make_tuple(64, 32, sad_64x32x4d_sse2),
make_tuple(32, 64, sad_32x64x4d_sse2),
make_tuple(32, 32, sad_32x32x4d_sse2),
make_tuple(32, 16, sad_32x16x4d_sse2),
make_tuple(16, 32, sad_16x32x4d_sse2),
make_tuple(16, 16, sad_16x16x4d_sse2),
make_tuple(16, 8, sad_16x8x4d_sse2),
make_tuple(8, 16, sad_8x16x4d_sse2),
make_tuple(8, 8, sad_8x8x4d_sse2)));
make_tuple(8, 8, sad_8x8x4d_sse2),
make_tuple(8, 4, sad_8x4x4d_sse2)));
#endif
#endif
......
......@@ -499,13 +499,14 @@ specialize vp9_sad8x8x4d sse2
# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad8x4x4d
specialize vp9_sad8x4x4d sse2
prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x8x4d
specialize vp9_sad4x8x4d sse
prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x4x4d sse
prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
specialize vp9_sub_pixel_mse16x16 sse2 mmx
......
......@@ -224,6 +224,8 @@ SADNXN4D 16, 16
SADNXN4D 16, 8
SADNXN4D 8, 16
SADNXN4D 8, 8
SADNXN4D 8, 4
INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment