Commit 2cfddec3 authored by Jingning Han's avatar Jingning Han
Browse files

Refactor column integral projection computation

Move the scaling factor outside column projection. This avoids
repeated calculation of the same scaling factor. Profiling shows
that the percentage of vp9_int_pro_col_sse2 of overall cycles
goes from 2.29% down to 1.88%.

Change-Id: I5ac4e324ab2d7f33ba2de66dd2a12e04e04dfd66
parent 7cf383d1
......@@ -1805,6 +1805,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv;
unsigned int best_sad, tmp_sad, this_sad[4];
MV this_mv;
const int norm_factor = 3 + (bw >> 5);
#if CONFIG_VP9_HIGHBITDEPTH
tmp_mv->row = 0;
......@@ -1822,7 +1823,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
for (idx = 0; idx < search_height; ++idx) {
vbuf[idx] = vp9_int_pro_col(ref_buf, bw);
vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor;
ref_buf += ref_stride;
}
......@@ -1834,7 +1835,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
src_buf = x->plane[0].src.buf;
for (idx = 0; idx < bh; ++idx) {
src_vbuf[idx] = vp9_int_pro_col(src_buf, bw);
src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor;
src_buf += src_stride;
}
......
......@@ -112,7 +112,6 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
__m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1;
int i;
const int norm_factor = 3 + (width >> 5);
for (i = 16; i < width; i += 16) {
ref += 16;
......@@ -124,7 +123,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
s1 = _mm_srli_si128(s0, 8);
s0 = _mm_adds_epu16(s0, s1);
return _mm_extract_epi16(s0, 0) >> norm_factor;
return _mm_extract_epi16(s0, 0);
}
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment