Commit 46ea9ec7 authored by Jingning Han's avatar Jingning Han

Enable real-time version reference motion vector search

This commit enables a fast reference motion vector search scheme.
It checks the nearest top and left neighboring blocks to decide the
most probable predicted motion vector. If it finds the two have
the same motion vectors, it then skip finding exterior range for
the second most probable motion vector, and correspondingly skips
the check for NEARMV.

The runtime of speed -5 goes down
pedestrian at 1080p 29377 ms -> 27783 ms
vidyo at 720p       11830 ms -> 10990 ms
i.e., 6%-8% speed-up.

For rtc set, the compression performance
goes down by about -1.3% for both speed -5 and -6.

Change-Id: I2a7794fa99734f739f8b30519ad4dfd511ab91a5
parent e84e8685
......@@ -11,181 +11,6 @@
#include "vp9/common/vp9_mvref_common.h"
#define MVREF_NEIGHBOURS 8
typedef struct position {
int row;
int col;
} POSITION;
typedef enum {
BOTH_ZERO = 0,
ZERO_PLUS_PREDICTED = 1,
BOTH_PREDICTED = 2,
NEW_PLUS_NON_INTRA = 3,
BOTH_NEW = 4,
INTRA_PLUS_NON_INTRA = 5,
BOTH_INTRA = 6,
INVALID_CASE = 9
} motion_vector_context;
// This is used to figure out a context for the ref blocks. The code flattens
// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
// adding 9 for each intra block, 3 for each zero mv and 1 for each new
// motion vector. This single number is then converted into a context
// with a single lookup ( counter_to_context ).
static const int mode_2_counter[MB_MODE_COUNT] = {
9, // DC_PRED
9, // V_PRED
9, // H_PRED
9, // D45_PRED
9, // D135_PRED
9, // D117_PRED
9, // D153_PRED
9, // D207_PRED
9, // D63_PRED
9, // TM_PRED
0, // NEARESTMV
0, // NEARMV
3, // ZEROMV
1, // NEWMV
};
// There are 3^3 different combinations of 3 counts that can be either 0,1 or
// 2. However the actual count can never be greater than 2 so the highest
// counter we need is 18. 9 is an invalid counter that's never used.
static const int counter_to_context[19] = {
BOTH_PREDICTED, // 0
NEW_PLUS_NON_INTRA, // 1
BOTH_NEW, // 2
ZERO_PLUS_PREDICTED, // 3
NEW_PLUS_NON_INTRA, // 4
INVALID_CASE, // 5
BOTH_ZERO, // 6
INVALID_CASE, // 7
INVALID_CASE, // 8
INTRA_PLUS_NON_INTRA, // 9
INTRA_PLUS_NON_INTRA, // 10
INVALID_CASE, // 11
INTRA_PLUS_NON_INTRA, // 12
INVALID_CASE, // 13
INVALID_CASE, // 14
INVALID_CASE, // 15
INVALID_CASE, // 16
INVALID_CASE, // 17
BOTH_INTRA // 18
};
static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
// 4X4
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 4X8
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 8X4
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 8X8
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 8X16
{{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
// 16X8
{{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
// 16X16
{{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
// 16X32
{{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
// 32X16
{{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
// 32X32
{{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
// 32X64
{{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
// 64X32
{{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
// 64X64
{{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
};
static const int idx_n_column_to_subblock[4][2] = {
{1, 2},
{1, 3},
{3, 2},
{3, 3}
};
// clamp_mv_ref
#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
xd->mb_to_right_edge + MV_BORDER,
xd->mb_to_top_edge - MV_BORDER,
xd->mb_to_bottom_edge + MV_BORDER);
}
// This function returns either the appropriate sub block or block's mv
// on whether the block_size < 8x8 and we have check_sub_blocks set.
static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
int search_col, int block_idx) {
return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
.as_mv[which_mv]
: candidate->mbmi.mv[which_mv];
}
// Performs mv sign inversion if indicated by the reference frame combination.
static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
const MV_REFERENCE_FRAME this_ref_frame,
const int *ref_sign_bias) {
int_mv mv = mbmi->mv[ref];
if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
mv.as_mv.row *= -1;
mv.as_mv.col *= -1;
}
return mv;
}
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
// skip all additional processing and jump to done!
#define ADD_MV_REF_LIST(mv) \
do { \
if (refmv_count) { \
if ((mv).as_int != mv_ref_list[0].as_int) { \
mv_ref_list[refmv_count] = (mv); \
goto Done; \
} \
} else { \
mv_ref_list[refmv_count++] = (mv); \
} \
} while (0)
// If either reference frame is different, not INTRA, and they
// are different from each other scale and add the mv to our list.
#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
do { \
if (is_inter_block(mbmi)) { \
if ((mbmi)->ref_frame[0] != ref_frame) \
ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
if (has_second_ref(mbmi) && \
(mbmi)->ref_frame[1] != ref_frame && \
(mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
} \
} while (0)
// Checks that the given mi_row, mi_col and search point
// are inside the borders of the tile.
static INLINE int is_inside(const TileInfo *const tile,
int mi_col, int mi_row, int mi_rows,
const POSITION *mi_pos) {
return !(mi_row + mi_pos->row < 0 ||
mi_col + mi_pos->col < tile->mi_col_start ||
mi_row + mi_pos->row >= mi_rows ||
mi_col + mi_pos->col >= tile->mi_col_end);
}
// This function searches the neighbourhood of a given MB/SB
// to try and find candidate reference vectors.
static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
......
......@@ -21,6 +21,181 @@ extern "C" {
#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
VP9_INTERP_EXTEND) << 3)
#define MVREF_NEIGHBOURS 8
typedef struct position {
int row;
int col;
} POSITION;
typedef enum {
BOTH_ZERO = 0,
ZERO_PLUS_PREDICTED = 1,
BOTH_PREDICTED = 2,
NEW_PLUS_NON_INTRA = 3,
BOTH_NEW = 4,
INTRA_PLUS_NON_INTRA = 5,
BOTH_INTRA = 6,
INVALID_CASE = 9
} motion_vector_context;
// This is used to figure out a context for the ref blocks. The code flattens
// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
// adding 9 for each intra block, 3 for each zero mv and 1 for each new
// motion vector. This single number is then converted into a context
// with a single lookup ( counter_to_context ).
static const int mode_2_counter[MB_MODE_COUNT] = {
9, // DC_PRED
9, // V_PRED
9, // H_PRED
9, // D45_PRED
9, // D135_PRED
9, // D117_PRED
9, // D153_PRED
9, // D207_PRED
9, // D63_PRED
9, // TM_PRED
0, // NEARESTMV
0, // NEARMV
3, // ZEROMV
1, // NEWMV
};
// There are 3^3 different combinations of 3 counts that can be either 0,1 or
// 2. However the actual count can never be greater than 2 so the highest
// counter we need is 18. 9 is an invalid counter that's never used.
static const int counter_to_context[19] = {
BOTH_PREDICTED, // 0
NEW_PLUS_NON_INTRA, // 1
BOTH_NEW, // 2
ZERO_PLUS_PREDICTED, // 3
NEW_PLUS_NON_INTRA, // 4
INVALID_CASE, // 5
BOTH_ZERO, // 6
INVALID_CASE, // 7
INVALID_CASE, // 8
INTRA_PLUS_NON_INTRA, // 9
INTRA_PLUS_NON_INTRA, // 10
INVALID_CASE, // 11
INTRA_PLUS_NON_INTRA, // 12
INVALID_CASE, // 13
INVALID_CASE, // 14
INVALID_CASE, // 15
INVALID_CASE, // 16
INVALID_CASE, // 17
BOTH_INTRA // 18
};
static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
// 4X4
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 4X8
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 8X4
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 8X8
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
// 8X16
{{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
// 16X8
{{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
// 16X16
{{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
// 16X32
{{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
// 32X16
{{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
// 32X32
{{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
// 32X64
{{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
// 64X32
{{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
// 64X64
{{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
};
static const int idx_n_column_to_subblock[4][2] = {
{1, 2},
{1, 3},
{3, 2},
{3, 3}
};
// clamp_mv_ref
#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
xd->mb_to_right_edge + MV_BORDER,
xd->mb_to_top_edge - MV_BORDER,
xd->mb_to_bottom_edge + MV_BORDER);
}
// This function returns either the appropriate sub block or block's mv
// on whether the block_size < 8x8 and we have check_sub_blocks set.
static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
int search_col, int block_idx) {
return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
.as_mv[which_mv]
: candidate->mbmi.mv[which_mv];
}
// Performs mv sign inversion if indicated by the reference frame combination.
static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
const MV_REFERENCE_FRAME this_ref_frame,
const int *ref_sign_bias) {
int_mv mv = mbmi->mv[ref];
if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
mv.as_mv.row *= -1;
mv.as_mv.col *= -1;
}
return mv;
}
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
// skip all additional processing and jump to done!
#define ADD_MV_REF_LIST(mv) \
do { \
if (refmv_count) { \
if ((mv).as_int != mv_ref_list[0].as_int) { \
mv_ref_list[refmv_count] = (mv); \
goto Done; \
} \
} else { \
mv_ref_list[refmv_count++] = (mv); \
} \
} while (0)
// If either reference frame is different, not INTRA, and they
// are different from each other scale and add the mv to our list.
#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
do { \
if (is_inter_block(mbmi)) { \
if ((mbmi)->ref_frame[0] != ref_frame) \
ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
if (has_second_ref(mbmi) && \
(mbmi)->ref_frame[1] != ref_frame && \
(mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
} \
} while (0)
// Checks that the given mi_row, mi_col and search point
// are inside the borders of the tile.
static INLINE int is_inside(const TileInfo *const tile,
int mi_col, int mi_row, int mi_rows,
const POSITION *mi_pos) {
return !(mi_row + mi_pos->row < 0 ||
mi_col + mi_pos->col < tile->mi_col_start ||
mi_row + mi_pos->row >= mi_rows ||
mi_col + mi_pos->col >= tile->mi_col_end);
}
// TODO(jingning): this mv clamping function should be block size dependent.
static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
......
......@@ -27,6 +27,85 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rdopt.h"
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int mi_row, int mi_col) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
int different_ref_found = 0;
int context_counter = 0;
int const_motion = 0;
// Blank the reference vector list
vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
// The nearest 2 blocks are treated differently
// if the size < 8x8 we get the mv from the bmi substructure,
// and we also need to keep a mode count.
for (i = 0; i < 2; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
xd->mi_stride];
const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
// Keep counts for entropy encoding.
context_counter += mode_2_counter[candidate->mode];
different_ref_found = 1;
if (candidate->ref_frame[0] == ref_frame)
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1));
}
}
const_motion = 1;
// Check the rest of the neighbors in much the same way
// as before except we don't need to keep track of sub blocks or
// mode counts.
for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
xd->mi_stride]->mbmi;
different_ref_found = 1;
if (candidate->ref_frame[0] == ref_frame)
ADD_MV_REF_LIST(candidate->mv[0]);
}
}
// Since we couldn't find 2 mvs from the same reference frame
// go back through the neighbors and find motion vectors from
// different reference frames.
if (different_ref_found && !refmv_count) {
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
* xd->mi_stride]->mbmi;
// If the candidate is INTRA we don't want to consider its mv.
IF_DIFF_REF_FRAME_ADD_MV(candidate);
}
}
}
Done:
mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
// Clamp vectors
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
return const_motion;
}
static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
int_mv *tmp_mv, int *rate_mv) {
......@@ -245,6 +324,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int bsl = mi_width_log2_lookup[bsize];
const int pred_filter_search = (((mi_row + mi_col) >> bsl) +
get_chessboard_index(cm)) % 2;
int const_motion[MAX_REF_FRAMES] = { 0 };
// For speed 6, the result of interp filter is reused later in actual encoding
// process.
......@@ -292,9 +372,27 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
vp9_setup_buffer_inter(cpi, x, tile,
ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
if (cm->coding_use_prev_mi)
vp9_find_mv_refs(cm, xd, tile, xd->mi[0], ref_frame,
candidates, mi_row, mi_col);
else
const_motion[ref_frame] = mv_refs_rt(cm, xd, tile, xd->mi[0],
ref_frame, candidates,
mi_row, mi_col);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame]);
if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8)
vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
ref_frame, bsize);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
......@@ -328,6 +426,10 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int rate_mv = 0;
if (const_motion[ref_frame] &&
(this_mode == NEARMV || this_mode == ZEROMV))
continue;
if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment