Commit 54eda13f authored by Jingning Han's avatar Jingning Han
Browse files

Apply fast motion search to golden reference frame

This commit enables the rtc coding mode to run integral projection
based motion search for golden reference frame. It improves the
speed -6 compression performance by 1.1% on average, 3.46% for
jimred_vga, 6.46% for tacomascmvvga, and 0.5% for vidyo clips. The
speed -6 is about 6% slower.

Change-Id: I0fe402ad2edf0149d0349ad304ab9b2abdf0c804
parent 1ca4d51b
......@@ -784,15 +784,43 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
if (this_mode == NEWMV) {
if (ref_frame > LAST_FRAME)
continue;
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
&frame_mv[NEWMV][ref_frame],
&rate_mv, best_rdc.rdcost))
if (ref_frame > LAST_FRAME) {
int tmp_sad;
int dis, cost_list[5];
if (bsize < BLOCK_16X16)
continue;
tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
continue;
frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
&mbmi->ref_mvs[ref_frame][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
&mbmi->ref_mvs[ref_frame][0].as_mv,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &dis,
&x->pred_sse[ref_frame], NULL, 0, 0);
} else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
&frame_mv[NEWMV][ref_frame],
&rate_mv, best_rdc.rdcost)) {
continue;
}
}
if (this_mode != NEARESTMV &&
......@@ -817,7 +845,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
pred_filter_search && (ref_frame == LAST_FRAME) &&
((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
(mbmi->mv[0].as_mv.col & 0x07) != 0)) {
int pf_rate[3];
......
......@@ -61,7 +61,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
const int ref_stride, const int height) {
int idx;
__m128i zero = _mm_setzero_si128();
__m128i src_line = _mm_load_si128((const __m128i *)ref);
__m128i src_line = _mm_loadu_si128((const __m128i *)ref);
__m128i s0 = _mm_unpacklo_epi8(src_line, zero);
__m128i s1 = _mm_unpackhi_epi8(src_line, zero);
__m128i t0, t1;
......@@ -69,14 +69,14 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
ref += ref_stride;
for (idx = 1; idx < height_1; idx += 2) {
src_line = _mm_load_si128((const __m128i *)ref);
src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
ref += ref_stride;
src_line = _mm_load_si128((const __m128i *)ref);
src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
......@@ -84,7 +84,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
ref += ref_stride;
}
src_line = _mm_load_si128((const __m128i *)ref);
src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
......@@ -101,9 +101,9 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
s1 = _mm_srai_epi16(s1, 3);
}
_mm_store_si128((__m128i *)hbuf, s0);
_mm_storeu_si128((__m128i *)hbuf, s0);
hbuf += 8;
_mm_store_si128((__m128i *)hbuf, s1);
_mm_storeu_si128((__m128i *)hbuf, s1);
}
int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment