Commit 2003cd80 authored by Yi Luo's avatar Yi Luo Committed by Gerrit Code Review
Browse files

Merge "Change register loading to fix stack overflow issue" into nextgenv2

parents bf89ee71 08184e32
......@@ -1151,7 +1151,9 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
SSE4_1, VpxSubpelVarianceTest,
::testing::Values(
make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_sse4_1, 8)));
make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_sse4_1, 8),
make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_sse4_1, 10),
make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_sse4_1, 12)));
INSTANTIATE_TEST_CASE_P(
SSE4_1, VpxSubpelAvgVarianceTest,
......
......@@ -1359,8 +1359,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
}
}
}
$vpx_highbd_10_sub_pixel_variance4x4_sse4_1='';
$vpx_highbd_12_sub_pixel_variance4x4_sse4_1='';
} # CONFIG_VP9_HIGHBITDEPTH
if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
......
......@@ -29,15 +29,15 @@ static INLINE void variance4x4_64_sse4_1(const uint8_t *a8, int a_stride,
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
uint16_t *b = CONVERT_TO_SHORTPTR(b8);
a0 = _mm_loadu_si128((__m128i const *) (a + 0 * a_stride));
a1 = _mm_loadu_si128((__m128i const *) (a + 1 * a_stride));
a2 = _mm_loadu_si128((__m128i const *) (a + 2 * a_stride));
a3 = _mm_loadu_si128((__m128i const *) (a + 3 * a_stride));
a0 = _mm_loadl_epi64((__m128i const *)(a + 0 * a_stride));
a1 = _mm_loadl_epi64((__m128i const *)(a + 1 * a_stride));
a2 = _mm_loadl_epi64((__m128i const *)(a + 2 * a_stride));
a3 = _mm_loadl_epi64((__m128i const *)(a + 3 * a_stride));
b0 = _mm_loadu_si128((__m128i const *) (b + 0 * b_stride));
b1 = _mm_loadu_si128((__m128i const *) (b + 1 * b_stride));
b2 = _mm_loadu_si128((__m128i const *) (b + 2 * b_stride));
b3 = _mm_loadu_si128((__m128i const *) (b + 3 * b_stride));
b0 = _mm_loadl_epi64((__m128i const *)(b + 0 * b_stride));
b1 = _mm_loadl_epi64((__m128i const *)(b + 1 * b_stride));
b2 = _mm_loadl_epi64((__m128i const *)(b + 2 * b_stride));
b3 = _mm_loadl_epi64((__m128i const *)(b + 3 * b_stride));
u0 = _mm_unpacklo_epi16(a0, a1);
u1 = _mm_unpacklo_epi16(a2, a3);
......@@ -130,6 +130,44 @@ uint32_t vpx_highbd_8_sub_pixel_variance4x4_sse4_1(
4, dst, dst_stride, sse);
}
uint32_t vpx_highbd_10_sub_pixel_variance4x4_sse4_1(
const uint8_t *src, int src_stride,
int xoffset, int yoffset,
const uint8_t *dst, int dst_stride,
uint32_t *sse) {
uint16_t fdata3[(4 + 1) * 4];
uint16_t temp2[4 * 4];
vpx_highbd_var_filter_block2d_bil_first_pass(
src, fdata3, src_stride, 1, 4 + 1,
4, bilinear_filters_2t[xoffset]);
vpx_highbd_var_filter_block2d_bil_second_pass(
fdata3, temp2, 4, 4, 4, 4,
bilinear_filters_2t[yoffset]);
return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp2),
4, dst, dst_stride, sse);
}
uint32_t vpx_highbd_12_sub_pixel_variance4x4_sse4_1(
const uint8_t *src, int src_stride,
int xoffset, int yoffset,
const uint8_t *dst, int dst_stride,
uint32_t *sse) {
uint16_t fdata3[(4 + 1) * 4];
uint16_t temp2[4 * 4];
vpx_highbd_var_filter_block2d_bil_first_pass(
src, fdata3, src_stride, 1, 4 + 1,
4, bilinear_filters_2t[xoffset]);
vpx_highbd_var_filter_block2d_bil_second_pass(
fdata3, temp2, 4, 4, 4, 4,
bilinear_filters_2t[yoffset]);
return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp2),
4, dst, dst_stride, sse);
}
// Sub-pixel average
uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4_sse4_1(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment