Commit 91b8236c authored by Yunqing Wang's avatar Yunqing Wang Committed by Gerrit Code Review

Merge "Add high-precision sub-pixel search as a speed feature" into nextgenv2

parents 46cd6ee9 e6e2d886
......@@ -284,7 +284,6 @@ EXPERIMENT_LIST="
ext_partition
ext_tile
obmc
affine_motion
"
CONFIG_LIST="
dependency_tracking
......
This diff is collapsed.
......@@ -286,12 +286,10 @@ typedef struct IMAGE_STAT {
double worst;
} ImageStat;
#if CONFIG_AFFINE_MOTION
typedef struct {
int ref_count;
YV12_BUFFER_CONFIG buf;
} EncRefCntBuffer;
#endif
typedef struct VP10_COMP {
QUANTS quants;
......@@ -311,11 +309,9 @@ typedef struct VP10_COMP {
YV12_BUFFER_CONFIG *unscaled_last_source;
YV12_BUFFER_CONFIG scaled_last_source;
#if CONFIG_AFFINE_MOTION
// Up-sampled reference buffers
EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES];
int upsampled_ref_idx[MAX_REF_FRAMES];
#endif
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
......@@ -702,7 +698,6 @@ void vp10_new_framerate(VP10_COMP *cpi, double framerate);
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
#if CONFIG_AFFINE_MOTION
// Update up-sampled reference frame index.
static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
int new_uidx) {
......@@ -714,7 +709,6 @@ static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
*uidx = new_uidx;
ubufs[new_uidx].ref_count++;
}
#endif
#ifdef __cplusplus
} // extern "C"
......
......@@ -64,11 +64,7 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
NULL, NULL,
#if CONFIG_AFFINE_MOTION
&distortion, &sse, NULL, 0, 0, 0);
#else
&distortion, &sse, NULL, 0, 0);
#endif
}
#if CONFIG_EXT_INTER
......
......@@ -210,7 +210,6 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
#if CONFIG_AFFINE_MOTION
static INLINE const uint8_t *upre(const uint8_t *buf, int stride,
int r, int c) {
return &buf[(r) * stride + (c)];
......@@ -232,7 +231,6 @@ static INLINE const uint8_t *upre(const uint8_t *buf, int stride,
} else { \
v = INT_MAX; \
}
#endif
#define FIRST_LEVEL_CHECKS \
{ \
......@@ -438,11 +436,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore(
int *distortion,
unsigned int *sse1,
const uint8_t *second_pred,
#if CONFIG_AFFINE_MOTION
int w, int h, int use_upsampled_ref) {
#else
int w, int h) {
#endif
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
z, src_stride, y, y_stride, second_pred,
......@@ -455,9 +449,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore(
(void) allow_hp;
(void) forced_stop;
(void) hstep;
#if CONFIG_AFFINE_MOTION
(void) use_upsampled_ref;
#endif
if (cost_list &&
cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
......@@ -524,16 +516,10 @@ int vp10_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
int *distortion,
unsigned int *sse1,
const uint8_t *second_pred,
#if CONFIG_AFFINE_MOTION
int w, int h,
int use_upsampled_ref) {
#else
int w, int h) {
#endif
SETUP_SUBPEL_SEARCH;
#if CONFIG_AFFINE_MOTION
(void) use_upsampled_ref;
#endif
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
z, src_stride, y, y_stride, second_pred,
......@@ -607,15 +593,9 @@ int vp10_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
int *distortion,
unsigned int *sse1,
const uint8_t *second_pred,
#if CONFIG_AFFINE_MOTION
int w, int h, int use_upsampled_ref) {
#else
int w, int h) {
#endif
SETUP_SUBPEL_SEARCH;
#if CONFIG_AFFINE_MOTION
(void) use_upsampled_ref;
#endif
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
z, src_stride, y, y_stride, second_pred,
......@@ -705,9 +685,8 @@ static const MV search_step_table[12] = {
{0, -1}, {0, 1}, {-1, 0}, {1, 0}
};
#if CONFIG_AFFINE_MOTION
#if CONFIG_VP9_HIGHBITDEPTH
// TODO(yunqing): Optimize the following 2 functions.
static void highbd_comp_avg_upsampled_pred(uint16_t *comp_pred,
const uint8_t *pred8,
int width, int height,
......@@ -798,7 +777,6 @@ static unsigned int upsampled_setup_center_error(
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
}
#endif
int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
......@@ -812,11 +790,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
int *distortion,
unsigned int *sse1,
const uint8_t *second_pred,
#if CONFIG_AFFINE_MOTION
int w, int h, int use_upsampled_ref) {
#else
int w, int h) {
#endif
const uint8_t *const z = x->plane[0].src.buf;
const uint8_t *const src_address = z;
const int src_stride = x->plane[0].src.stride;
......@@ -852,7 +826,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
bestmv->row *= 8;
bestmv->col *= 8;
#if CONFIG_AFFINE_MOTION
// use_upsampled_ref can be 0 or 1
if (use_upsampled_ref)
besterr = upsampled_setup_center_error(xd, bestmv, ref_mv, error_per_bit,
......@@ -860,7 +833,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
second_pred, w, h, (offset << 3),
mvjcost, mvcost, sse1, distortion);
else
#endif
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
z, src_stride, y, y_stride, second_pred,
w, h, offset, mvjcost, mvcost,
......@@ -876,7 +848,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
MV this_mv = {tr, tc};
#if CONFIG_AFFINE_MOTION
if (use_upsampled_ref) {
const uint8_t *const pre_address = y + tr * y_stride + tc;
......@@ -884,7 +855,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
pre_address, y_stride, second_pred,
w, h, &sse);
} else {
#endif
const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
(tc >> 3);
if (second_pred == NULL)
......@@ -893,9 +863,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
else
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
#if CONFIG_AFFINE_MOTION
}
#endif
cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
......@@ -920,7 +888,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
MV this_mv = {tr, tc};
#if CONFIG_AFFINE_MOTION
if (use_upsampled_ref) {
const uint8_t *const pre_address = y + tr * y_stride + tc;
......@@ -928,7 +895,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
pre_address, y_stride, second_pred,
w, h, &sse);
} else {
#endif
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
if (second_pred == NULL)
......@@ -937,9 +903,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
else
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
#if CONFIG_AFFINE_MOTION
}
#endif
cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
......@@ -963,15 +927,11 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
}
if (iters_per_step > 1 && best_idx != -1) {
#if CONFIG_AFFINE_MOTION
if (use_upsampled_ref) {
SECOND_LEVEL_CHECKS_BEST(1);
} else {
#endif
SECOND_LEVEL_CHECKS_BEST(0);
#if CONFIG_AFFINE_MOTION
}
#endif
}
tr = br;
......
......@@ -116,11 +116,7 @@ typedef int (fractional_mv_step_fp) (
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
const uint8_t *second_pred,
#if CONFIG_AFFINE_MOTION
int w, int h, int use_upsampled_ref);
#else
int w, int h);
#endif
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned;
......
This diff is collapsed.
......@@ -103,13 +103,16 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
int tmp_stride[MAX_MB_PLANE]);
#endif // CONFIG_OBMC
#if CONFIG_AFFINE_MOTION
static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
const int ref) {
// Use up-sampled reference frames.
int ref_idx = 0;
if (ref == LAST_FRAME)
#if CONFIG_EXT_REFS
ref_idx = cpi->lst_fb_idxes[ref - LAST_FRAME];
#else
ref_idx = cpi->lst_fb_idx;
#endif
else if (ref == GOLDEN_FRAME)
ref_idx = cpi->gld_fb_idx;
else if (ref == ALTREF_FRAME)
......@@ -117,7 +120,6 @@ static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf;
}
#endif
#ifdef __cplusplus
} // extern "C"
......
......@@ -177,6 +177,7 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->allow_partition_search_skip = 1;
sf->use_upsampled_references = 0;
#if CONFIG_EXT_TX
sf->tx_type_search = PRUNE_TWO;
#endif
......@@ -279,6 +280,7 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->use_fast_coef_costing = 1;
sf->allow_exhaustive_searches = 0;
sf->exhaustive_searches_thresh = INT_MAX;
sf->use_upsampled_references = 0;
// Use transform domain distortion computation
// Note var-tx expt always uses pixel domain distortion.
......@@ -495,6 +497,11 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->disable_filter_search_var_thresh = 0;
sf->adaptive_interp_filter_search = 0;
sf->allow_partition_search_skip = 0;
#if CONFIG_EXT_REFS
sf->use_upsampled_references = 0;
#else
sf->use_upsampled_references = 1;
#endif
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
......
......@@ -479,6 +479,9 @@ typedef struct SPEED_FEATURES {
// Fast approximation of vp10_model_rd_from_var_lapndz
int simple_model_rd_from_var;
// Do sub-pixel search in up-sampled reference frames
int use_upsampled_references;
// Whether to compute distortion in the image domain (slower but
// more accurate), or in the transform domain (faster but less acurate).
int use_transform_domain_distortion;
......
......@@ -320,11 +320,7 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
NULL, NULL,
#if CONFIG_AFFINE_MOTION
&distortion, &sse, NULL, 0, 0, 0);
#else
&distortion, &sse, NULL, 0, 0);
#endif
// Restore input state
x->plane[0].src = src;
......
......@@ -267,7 +267,6 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred,
}
}
#if CONFIG_AFFINE_MOTION
// Get pred block from up-sampled reference.
void vpx_upsampled_pred_c(uint8_t *comp_pred,
int width, int height,
......@@ -300,7 +299,6 @@ void vpx_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
ref += stride;
}
}
#endif
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_variance64(const uint8_t *a8, int a_stride,
......
......@@ -1233,12 +1233,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# ...
#
if (vpx_config("CONFIG_AFFINE_MOTION") eq "yes") {
add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
specialize qw/vpx_upsampled_pred sse2/;
add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
specialize qw/vpx_comp_avg_upsampled_pred sse2/;
}
add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
specialize qw/vpx_upsampled_pred sse2/;
add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
specialize qw/vpx_comp_avg_upsampled_pred sse2/;
#
# ...
......
......@@ -476,7 +476,6 @@ FNS(ssse3, ssse3);
#undef FN
#endif // CONFIG_USE_X86INC
#if CONFIG_AFFINE_MOTION
void vpx_upsampled_pred_sse2(uint8_t *comp_pred,
int width, int height,
const uint8_t *ref, int ref_stride) {
......@@ -703,4 +702,3 @@ void vpx_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred,
}
}
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment