 ... ... @@ -245,6 +245,71 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }, \ v = INT_MAX;) #define FIRST_LEVEL_CHECKS \ { \ unsigned int left, right, up, down, diag; \ CHECK_BETTER(left, tr, tc - hstep); \ CHECK_BETTER(right, tr, tc + hstep); \ CHECK_BETTER(up, tr - hstep, tc); \ CHECK_BETTER(down, tr + hstep, tc); \ whichdir = (left < right ? 0 : 1) + \ (up < down ? 0 : 2); \ switch (whichdir) { \ case 0: \ CHECK_BETTER(diag, tr - hstep, tc - hstep); \ break; \ case 1: \ CHECK_BETTER(diag, tr - hstep, tc + hstep); \ break; \ case 2: \ CHECK_BETTER(diag, tr + hstep, tc - hstep); \ break; \ case 3: \ CHECK_BETTER(diag, tr + hstep, tc + hstep); \ break; \ } \ } #define SECOND_LEVEL_CHECKS \ { \ int kr, kc; \ unsigned int second; \ if (tr != br && tc != bc) { \ kr = br - tr; \ kc = bc - tc; \ CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ } else if (tr == br && tc != bc) { \ kc = bc - tc; \ CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ switch (whichdir) { \ case 0: \ case 1: \ CHECK_BETTER(second, tr + hstep, tc + kc); \ break; \ case 2: \ case 3: \ CHECK_BETTER(second, tr - hstep, tc + kc); \ break; \ } \ } else if (tr != br && tc == bc) { \ kr = br - tr; \ CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ switch (whichdir) { \ case 0: \ case 2: \ CHECK_BETTER(second, tr + kr, tc + hstep); \ break; \ case 1: \ case 3: \ CHECK_BETTER(second, tr + kr, tc - hstep); \ break; \ } \ } \ } int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, ... ... @@ -261,7 +326,6 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = iters_per_step; ... ... @@ -306,32 +370,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, // common with the last iteration could be 2 ( if diag selected) while (halfiters--) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } ... ... @@ -343,32 +385,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, if (forced_stop != 2) { hstep >>= 1; while (quarteriters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } ... ... @@ -378,32 +398,10 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, forced_stop == 0) { hstep >>= 1; while (eighthiters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } ... ... @@ -419,6 +417,105 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, return besterr; } int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; unsigned int sse; unsigned int whichdir; int thismse; int maxc, minc, maxr, minr; int y_stride; int offset; unsigned int halfiters = iters_per_step; unsigned int quarteriters = iters_per_step; unsigned int eighthiters = iters_per_step; uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; y_stride = xd->plane[0].pre[0].stride; rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col; br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3; hstep = 4; minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); tr = br; tc = bc; offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; // calculate central point error besterr = vfp->vf(y, y_stride, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // 1/2 pel FIRST_LEVEL_CHECKS; if (halfiters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only if (forced_stop != 2) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (quarteriters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; } if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; } bestmv->as_mv.row = br; bestmv->as_mv.col = bc; if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr; } #undef DIST /* returns subpixel variance error function */ #define DIST(r, c) \ ... ... @@ -443,7 +540,6 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = iters_per_step; ... ... @@ -478,7 +574,6 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, tr = br; tc = bc; offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv ... ... @@ -497,32 +592,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, // common with the last iteration could be 2 ( if diag selected) while (halfiters--) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } ... ... @@ -534,32 +607,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, if (forced_stop != 2) { hstep >>= 1; while (quarteriters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } ... ... @@ -569,32 +620,10 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, forced_stop == 0) { hstep >>= 1; while (eighthiters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(down, tr + hstep, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; } FIRST_LEVEL_CHECKS; // no reason to check the same one again. if (tr == br && tc == bc) break; tr = br; tc = bc; } ... ... @@ -609,6 +638,116 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, return besterr; } int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w, int h) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; int rr, rc, br, bc, hstep; int tr, tc; unsigned int besterr = INT_MAX; unsigned int sse; unsigned int whichdir; int thismse; int maxc, minc, maxr, minr; int y_stride; int offset; unsigned int halfiters = iters_per_step; unsigned int quarteriters = iters_per_step; unsigned int eighthiters = iters_per_step; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); uint8_t *y = xd->plane[0].pre[0].buf + (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; y_stride = xd->plane[0].pre[0].stride; rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col; br = bestmv->as_mv.row << 3; bc = bestmv->as_mv.col << 3; hstep = 4; minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); tr = br; tc = bc; offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- // pixel search, and can be passed in this function. comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) // 1/2 pel FIRST_LEVEL_CHECKS; if (halfiters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only if (forced_stop != 2) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (quarteriters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; } if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; } bestmv->as_mv.row = br; bestmv->as_mv.col = bc; if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr; } #undef MVC #undef PRE #undef DIST ... ...
 ... ... @@ -80,6 +80,21 @@ typedef int (fractional_mv_step_fp) ( int *distortion, unsigned int *sse); extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; typedef int (fractional_mv_step_comp_fp) ( MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w, int h); extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_iterative; extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_tree; typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, ... ... @@ -102,18 +117,6 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, int *mvjcost, int *mvcost[2], int_mv *center_mv); int vp9_find_best_sub_pixel_comp_iterative( MACROBLOCK *x, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w, int h); int vp9_refining_search_8p_c(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, int search_range, vp9_variance_fn_ptr_t *fn_ptr, ... ...
 ... ... @@ -709,8 +709,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_method = NSTEP; sf->auto_filter = 1; sf->recode_loop = 1; sf->subpel_search_method = SUBPEL_ITERATIVE; sf->subpel_iters_per_step = 3; sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; sf->optimize_coefficients = !cpi->oxcf.lossless; sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; ... ... @@ -829,7 +829,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; sf->auto_mv_step_size = 1; sf->search_method = SQUARE; sf->subpel_iters_per_step = 2; sf->subpel_iters_per_step = 1; } if (speed == 3) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; ... ... @@ -923,9 +923,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative; } else { // TODO(debargha): Other methods to come assert(0); cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_iterative; } else if (cpi->sf.subpel_search_method == SUBPEL_TREE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; } cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; ... ...
 ... ... @@ -234,6 +234,7 @@ typedef enum { typedef enum { SUBPEL_ITERATIVE = 0, SUBPEL_TREE = 1, // Other methods to come } SUBPEL_SEARCH_METHODS; ... ... @@ -534,6 +535,7 @@ typedef struct VP9_COMP { unsigned int active_map_enabled; fractional_mv_step_fp *find_fractional_mv_step; fractional_mv_step_comp_fp *find_fractional_mv_step_comp; vp9_full_search_fn_t full_search_sad; vp9_refining_search_fn_t refining_search_sad; vp9_diamond_search_fn_t diamond_search_sad; ... ...
 ... ... @@ -2663,7 +2663,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; bestsme = vp9_find_best_sub_pixel_comp_iterative(