Commit fa96eeb8 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Implement SSE version for sad4x8x4d and SSE2 version for sad8x4x4d.

Encoding time of crew (CIF, first 50 frames) @ 1500kbps goes from 4min56
to 4min42.

Change-Id: I92c0c8b32980d2ae7c6dafc8b883a2c7fcd14a9f
parent 1fa04e1a
...@@ -332,15 +332,31 @@ INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests)); ...@@ -332,15 +332,31 @@ INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
#if CONFIG_VP9_ENCODER #if CONFIG_VP9_ENCODER
const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c; const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c;
const sad_n_by_n_by_4_fn_t sad_64x32x4d_c = vp9_sad64x32x4d_c;
const sad_n_by_n_by_4_fn_t sad_32x64x4d_c = vp9_sad32x64x4d_c;
const sad_n_by_n_by_4_fn_t sad_32x32x4d_c = vp9_sad32x32x4d_c; const sad_n_by_n_by_4_fn_t sad_32x32x4d_c = vp9_sad32x32x4d_c;
const sad_n_by_n_by_4_fn_t sad_32x16x4d_c = vp9_sad32x16x4d_c;
const sad_n_by_n_by_4_fn_t sad_16x32x4d_c = vp9_sad16x32x4d_c;
const sad_n_by_n_by_4_fn_t sad_16x16x4d_c = vp9_sad16x16x4d_c; const sad_n_by_n_by_4_fn_t sad_16x16x4d_c = vp9_sad16x16x4d_c;
const sad_n_by_n_by_4_fn_t sad_16x8x4d_c = vp9_sad16x8x4d_c;
const sad_n_by_n_by_4_fn_t sad_8x16x4d_c = vp9_sad8x16x4d_c;
const sad_n_by_n_by_4_fn_t sad_8x8x4d_c = vp9_sad8x8x4d_c; const sad_n_by_n_by_4_fn_t sad_8x8x4d_c = vp9_sad8x8x4d_c;
const sad_n_by_n_by_4_fn_t sad_8x4x4d_c = vp9_sad8x4x4d_c;
const sad_n_by_n_by_4_fn_t sad_4x8x4d_c = vp9_sad4x8x4d_c;
const sad_n_by_n_by_4_fn_t sad_4x4x4d_c = vp9_sad4x4x4d_c; const sad_n_by_n_by_4_fn_t sad_4x4x4d_c = vp9_sad4x4x4d_c;
INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values( INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values(
make_tuple(64, 64, sad_64x64x4d_c), make_tuple(64, 64, sad_64x64x4d_c),
make_tuple(64, 32, sad_64x32x4d_c),
make_tuple(32, 64, sad_32x64x4d_c),
make_tuple(32, 32, sad_32x32x4d_c), make_tuple(32, 32, sad_32x32x4d_c),
make_tuple(32, 16, sad_32x16x4d_c),
make_tuple(16, 32, sad_16x32x4d_c),
make_tuple(16, 16, sad_16x16x4d_c), make_tuple(16, 16, sad_16x16x4d_c),
make_tuple(16, 8, sad_16x8x4d_c),
make_tuple(8, 16, sad_8x16x4d_c),
make_tuple(8, 8, sad_8x8x4d_c), make_tuple(8, 8, sad_8x8x4d_c),
make_tuple(8, 4, sad_8x4x4d_c),
make_tuple(4, 8, sad_4x8x4d_c),
make_tuple(4, 4, sad_4x4x4d_c))); make_tuple(4, 4, sad_4x4x4d_c)));
#endif #endif
...@@ -407,8 +423,10 @@ const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse; ...@@ -407,8 +423,10 @@ const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values( INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
make_tuple(4, 4, sad_4x4_sse_vp9))); make_tuple(4, 4, sad_4x4_sse_vp9)));
const sad_n_by_n_by_4_fn_t sad_4x8x4d_sse = vp9_sad4x8x4d_sse;
const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse = vp9_sad4x4x4d_sse; const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse = vp9_sad4x4x4d_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values( INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
make_tuple(4, 8, sad_4x8x4d_sse),
make_tuple(4, 4, sad_4x4x4d_sse))); make_tuple(4, 4, sad_4x4x4d_sse)));
#endif #endif
#endif #endif
...@@ -450,18 +468,28 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); ...@@ -450,18 +468,28 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
#if CONFIG_VP9_ENCODER #if CONFIG_VP9_ENCODER
const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2; const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2; const sad_n_by_n_by_4_fn_t sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x16x4d_sse2 = vp9_sad32x16x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_16x32x4d_sse2 = vp9_sad16x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2; const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2; const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2; const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2; const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_8x4x4d_sse2 = vp9_sad8x4x4d_sse2;
INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values( INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
make_tuple(64, 64, sad_64x64x4d_sse2), make_tuple(64, 64, sad_64x64x4d_sse2),
make_tuple(64, 32, sad_64x32x4d_sse2),
make_tuple(32, 64, sad_32x64x4d_sse2),
make_tuple(32, 32, sad_32x32x4d_sse2), make_tuple(32, 32, sad_32x32x4d_sse2),
make_tuple(32, 16, sad_32x16x4d_sse2),
make_tuple(16, 32, sad_16x32x4d_sse2),
make_tuple(16, 16, sad_16x16x4d_sse2), make_tuple(16, 16, sad_16x16x4d_sse2),
make_tuple(16, 8, sad_16x8x4d_sse2), make_tuple(16, 8, sad_16x8x4d_sse2),
make_tuple(8, 16, sad_8x16x4d_sse2), make_tuple(8, 16, sad_8x16x4d_sse2),
make_tuple(8, 8, sad_8x8x4d_sse2))); make_tuple(8, 8, sad_8x8x4d_sse2),
make_tuple(8, 4, sad_8x4x4d_sse2)));
#endif #endif
#endif #endif
......
...@@ -499,13 +499,14 @@ specialize vp9_sad8x8x4d sse2 ...@@ -499,13 +499,14 @@ specialize vp9_sad8x8x4d sse2
# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form # TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad8x4x4d specialize vp9_sad8x4x4d sse2
prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x8x4d specialize vp9_sad4x8x4d sse
prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x4x4d sse specialize vp9_sad4x4x4d sse
prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
specialize vp9_sub_pixel_mse16x16 sse2 mmx specialize vp9_sub_pixel_mse16x16 sse2 mmx
......
...@@ -224,6 +224,8 @@ SADNXN4D 16, 16 ...@@ -224,6 +224,8 @@ SADNXN4D 16, 16
SADNXN4D 16, 8 SADNXN4D 16, 8
SADNXN4D 8, 16 SADNXN4D 8, 16
SADNXN4D 8, 8 SADNXN4D 8, 8
SADNXN4D 8, 4
INIT_MMX sse INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4 SADNXN4D 4, 4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment