diff --git a/configure b/configure index e7eb1521f1f146f735d50683490216fd3c585bcb..97366e478709c771a4fb7c44b314fe4132bf0be8 100755 --- a/configure +++ b/configure @@ -284,7 +284,6 @@ EXPERIMENT_LIST=" ext_partition ext_tile obmc - affine_motion " CONFIG_LIST=" dependency_tracking diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 34dd8d54b145ea20bde0ddf44828e62822d9a7a0..fc65e723e91dd5cb7104504a0f232b6ec696897d 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -358,9 +358,7 @@ void vp10_initialize_enc(void) { static void dealloc_compressor_data(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; -#if CONFIG_REF_MV int i; -#endif vpx_free(cpi->mbmi_ext_base); cpi->mbmi_ext_base = NULL; @@ -413,14 +411,9 @@ static void dealloc_compressor_data(VP10_COMP *cpi) { vpx_free(cpi->active_map.map); cpi->active_map.map = NULL; -#if CONFIG_AFFINE_MOTION - { - // Free up-sampled reference buffers. - int i; - for (i = 0; i < MAX_REF_FRAMES; i++) - vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf); - } -#endif + // Free up-sampled reference buffers. + for (i = 0; i < MAX_REF_FRAMES; i++) + vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf); vp10_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC @@ -756,26 +749,6 @@ static void alloc_util_frame_buffers(VP10_COMP *cpi) { NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled last source buffer"); - -#if CONFIG_AFFINE_MOTION - { - // Allocate up-sampled reference buffers. - int i; - - for (i = 0; i < MAX_REF_FRAMES; i++) - if (vpx_realloc_frame_buffer(&cpi->upsampled_ref_bufs[i].buf, - (cm->width << 3), (cm->height << 3), - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - (VP9_ENC_BORDER_IN_PIXELS << 3), - cm->byte_alignment, - NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate up-sampled reference frame buffer"); - } -#endif } @@ -2069,6 +2042,14 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } while (++i <= MV_MAX); } +static INLINE void init_upsampled_ref_frame_bufs(VP10_COMP *cpi) { + int i; + + for (i = 0; i < MAX_REF_FRAMES; ++i) { + cpi->upsampled_ref_bufs[i].ref_count = 0; + cpi->upsampled_ref_idx[i] = INVALID_IDX; + } +} VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, BufferPool *const pool) { @@ -2267,6 +2248,8 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, vp10_init_second_pass(cpi); } + init_upsampled_ref_frame_bufs(cpi); + vp10_set_speed_features_framesize_independent(cpi); vp10_set_speed_features_framesize_dependent(cpi); @@ -2929,7 +2912,6 @@ static int recode_loop_test(VP10_COMP *cpi, return force_recode; } -#if CONFIG_AFFINE_MOTION static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) { int i; @@ -2941,50 +2923,59 @@ static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) { return INVALID_IDX; } -// Up-sample reference frames. -static INLINE int upsample_ref_frame(RefCntBuffer *bufs, -#if CONFIG_VP9_HIGHBITDEPTH - EncRefCntBuffer *ubufs, int new_idx, - int bit_depth) { -#else - EncRefCntBuffer *ubufs, int new_idx) { -#endif +// Up-sample 1 reference frame. +static INLINE int upsample_ref_frame(VP10_COMP *cpi, + const YV12_BUFFER_CONFIG *const ref) { + VP10_COMMON * const cm = &cpi->common; + EncRefCntBuffer *ubufs = cpi->upsampled_ref_bufs; int new_uidx = get_free_upsampled_ref_buf(ubufs); if (new_uidx == INVALID_IDX) { return INVALID_IDX; } else { - const YV12_BUFFER_CONFIG *const ref = &bufs[new_idx].buf; YV12_BUFFER_CONFIG *upsampled_ref = &ubufs[new_uidx].buf; + // Can allocate buffer for Y plane only. + if (upsampled_ref->buffer_alloc_sz < (ref->buffer_alloc_sz << 6)) + if (vpx_realloc_frame_buffer(upsampled_ref, + (cm->width << 3), (cm->height << 3), + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + (VP9_ENC_BORDER_IN_PIXELS << 3), + cm->byte_alignment, + NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate up-sampled frame buffer"); + // Currently, only Y plane is up-sampled, U, V are not used. #if CONFIG_VP9_HIGHBITDEPTH - scale_and_extend_frame(ref, upsampled_ref, 1, bit_depth); + scale_and_extend_frame(ref, upsampled_ref, 1, (int)cm->bit_depth); #else scale_and_extend_frame(ref, upsampled_ref, 1); #endif return new_uidx; } } -#endif void vp10_update_reference_frames(VP10_COMP *cpi) { VP10_COMMON * const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; + const int use_upsampled_ref = cpi->sf.use_upsampled_references; + int new_uidx = 0; + #if CONFIG_EXT_REFS int ref_frame; #endif // CONFIG_EXT_REFS -#if CONFIG_AFFINE_MOTION - // Always up-sample the current encoded frame. -#if CONFIG_VP9_HIGHBITDEPTH - int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs, - cm->new_fb_idx, (int)cm->bit_depth); -#else - int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs, - cm->new_fb_idx); -#endif -#endif + if (use_upsampled_ref) { + // Up-sample the current encoded frame. + RefCntBuffer *bufs = pool->frame_bufs; + const YV12_BUFFER_CONFIG *const ref = &bufs[cm->new_fb_idx].buf; + + new_uidx = upsample_ref_frame(cpi, ref); + } // At this point the new frame has been encoded. // If any buffer copy / swapping is signaled it should be done here. @@ -2994,12 +2985,12 @@ void vp10_update_reference_frames(VP10_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); -#if CONFIG_AFFINE_MOTION - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx); - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); -#endif + if (use_upsampled_ref) { + uref_cnt_fb(cpi->upsampled_ref_bufs, + &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx); + uref_cnt_fb(cpi->upsampled_ref_bufs, + &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); + } } else if (vp10_preserve_existing_gf(cpi)) { // We have decided to preserve the previously existing golden frame as our // new ARF frame. However, in the short term in function @@ -3013,10 +3004,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); -#if CONFIG_AFFINE_MOTION - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); -#endif + if (use_upsampled_ref) + uref_cnt_fb(cpi->upsampled_ref_bufs, + &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); + tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; @@ -3030,10 +3021,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); -#if CONFIG_AFFINE_MOTION - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); -#endif + if (use_upsampled_ref) + uref_cnt_fb(cpi->upsampled_ref_bufs, + &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); + memcpy(cpi->interp_filter_selected[ALTREF_FRAME], cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); @@ -3042,10 +3033,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) { if (cpi->refresh_golden_frame) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); -#if CONFIG_AFFINE_MOTION - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx); -#endif + if (use_upsampled_ref) + uref_cnt_fb(cpi->upsampled_ref_bufs, + &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx); + if (!cpi->rc.is_src_frame_alt_ref) memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], cpi->interp_filter_selected[0], @@ -3080,10 +3071,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) { if (cpi->refresh_last_frame) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); -#if CONFIG_AFFINE_MOTION - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx); -#endif + if (use_upsampled_ref) + uref_cnt_fb(cpi->upsampled_ref_bufs, + &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx); + if (!cpi->rc.is_src_frame_alt_ref) { memcpy(cpi->interp_filter_selected[LAST_FRAME], cpi->interp_filter_selected[0], @@ -3249,8 +3240,9 @@ void vp10_scale_references(VP10_COMP *cpi) { } #endif // CONFIG_VP9_HIGHBITDEPTH -#if CONFIG_AFFINE_MOTION - { + if (cpi->sf.use_upsampled_references && (force_scaling || + new_fb_ptr->buf.y_crop_width != cm->width || + new_fb_ptr->buf.y_crop_height != cm->height)) { const int map_idx = get_ref_frame_map_idx(cpi, ref_frame); EncRefCntBuffer *ubuf = &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[map_idx]]; @@ -3267,15 +3259,12 @@ void vp10_scale_references(VP10_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate up-sampled frame buffer"); #if CONFIG_VP9_HIGHBITDEPTH - scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE, + scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1, (int)cm->bit_depth); #else - scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE); + scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1); #endif - cpi->scaled_ref_idx[ref_frame - LAST_FRAME] = new_fb; - alloc_frame_mvs(cm, new_fb); } -#endif } else { const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); RefCntBuffer *const buf = &pool->frame_bufs[buf_idx]; @@ -3610,9 +3599,28 @@ static void set_frame_size(VP10_COMP *cpi) { set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); } +static void reset_use_upsampled_references(VP10_COMP *cpi) { + MV_REFERENCE_FRAME ref_frame; + + // reset up-sampled reference buffer structure. + init_upsampled_ref_frame_bufs(cpi); + + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, + ref_frame); + int new_uidx = upsample_ref_frame(cpi, ref); + + // Update the up-sampled reference index. + cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)] = + new_uidx; + cpi->upsampled_ref_bufs[new_uidx].ref_count++; + } +} + static void encode_without_recode_loop(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; int q = 0, bottom_index = 0, top_index = 0; // Dummy variables. + const int use_upsampled_ref = cpi->sf.use_upsampled_references; vpx_clear_system_state(); @@ -3647,6 +3655,12 @@ static void encode_without_recode_loop(VP10_COMP *cpi) { set_size_independent_vars(cpi); set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); + // cpi->sf.use_upsampled_references can be different from frame to frame. + // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1. + // The reference frames for this frame have to be up-sampled before encoding. + if (!use_upsampled_ref && cpi->sf.use_upsampled_references) + reset_use_upsampled_references(cpi); + vp10_set_quantizer(cm, q); vp10_set_variance_partition_thresholds(cpi, q); @@ -3694,9 +3708,16 @@ static void encode_with_recode_loop(VP10_COMP *cpi, int frame_over_shoot_limit; int frame_under_shoot_limit; int q = 0, q_low = 0, q_high = 0; + const int use_upsampled_ref = cpi->sf.use_upsampled_references; set_size_independent_vars(cpi); + // cpi->sf.use_upsampled_references can be different from frame to frame. + // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1. + // The reference frames for this frame have to be up-sampled before encoding. + if (!use_upsampled_ref && cpi->sf.use_upsampled_references) + reset_use_upsampled_references(cpi); + do { vpx_clear_system_state(); @@ -4355,17 +4376,6 @@ static void init_ref_frame_bufs(VP10_COMMON *cm) { } } -#if CONFIG_AFFINE_MOTION -static INLINE void init_upsampled_ref_frame_bufs(VP10_COMP *cpi) { - int i; - - for (i = 0; i < MAX_REF_FRAMES; ++i) { - cpi->upsampled_ref_bufs[i].ref_count = 0; - cpi->upsampled_ref_idx[i] = INVALID_IDX; - } -} -#endif - static void check_initial_width(VP10_COMP *cpi, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, @@ -4388,9 +4398,7 @@ static void check_initial_width(VP10_COMP *cpi, alloc_raw_frame_buffers(cpi); init_ref_frame_bufs(cm); alloc_util_frame_buffers(cpi); -#if CONFIG_AFFINE_MOTION - init_upsampled_ref_frame_bufs(cpi); -#endif + init_motion_estimation(cpi); // TODO(agrange) This can be removed. cpi->initial_width = cm->width; diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index b2c242c8c181f64ed004a46bb668722367a2292c..afe3292d37a3661eb808874082b7ad00b65f480d 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -286,12 +286,10 @@ typedef struct IMAGE_STAT { double worst; } ImageStat; -#if CONFIG_AFFINE_MOTION typedef struct { int ref_count; YV12_BUFFER_CONFIG buf; } EncRefCntBuffer; -#endif typedef struct VP10_COMP { QUANTS quants; @@ -311,11 +309,9 @@ typedef struct VP10_COMP { YV12_BUFFER_CONFIG *unscaled_last_source; YV12_BUFFER_CONFIG scaled_last_source; -#if CONFIG_AFFINE_MOTION // Up-sampled reference buffers EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES]; int upsampled_ref_idx[MAX_REF_FRAMES]; -#endif TileDataEnc *tile_data; int allocated_tiles; // Keep track of memory allocated for tiles. @@ -702,7 +698,6 @@ void vp10_new_framerate(VP10_COMP *cpi, double framerate); #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) -#if CONFIG_AFFINE_MOTION // Update up-sampled reference frame index. static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx, int new_uidx) { @@ -714,7 +709,6 @@ static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx, *uidx = new_uidx; ubufs[new_uidx].ref_count++; } -#endif #ifdef __cplusplus } // extern "C" diff --git a/vp10/encoder/mbgraph.c b/vp10/encoder/mbgraph.c index 32ff0faf67f465efbf2cbffdd47985bae9b0517e..5e66ce59a82fe82d57a4857fd09c83e35e18054d 100644 --- a/vp10/encoder/mbgraph.c +++ b/vp10/encoder/mbgraph.c @@ -64,11 +64,7 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi, &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL, -#if CONFIG_AFFINE_MOTION &distortion, &sse, NULL, 0, 0, 0); -#else - &distortion, &sse, NULL, 0, 0); -#endif } #if CONFIG_EXT_INTER diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c index 2c9397640df9ff66886f88d61e45a85c151fd58c..1f147d7edd496dc889d2585b41a3356f1f8f2eb0 100644 --- a/vp10/encoder/mcomp.c +++ b/vp10/encoder/mcomp.c @@ -210,7 +210,6 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) -#if CONFIG_AFFINE_MOTION static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r, int c) { return &buf[(r) * stride + (c)]; @@ -232,7 +231,6 @@ static INLINE const uint8_t *upre(const uint8_t *buf, int stride, } else { \ v = INT_MAX; \ } -#endif #define FIRST_LEVEL_CHECKS \ { \ @@ -438,11 +436,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore( int *distortion, unsigned int *sse1, const uint8_t *second_pred, -#if CONFIG_AFFINE_MOTION int w, int h, int use_upsampled_ref) { -#else - int w, int h) { -#endif SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, @@ -455,9 +449,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore( (void) allow_hp; (void) forced_stop; (void) hstep; -#if CONFIG_AFFINE_MOTION (void) use_upsampled_ref; -#endif if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && @@ -524,16 +516,10 @@ int vp10_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, int *distortion, unsigned int *sse1, const uint8_t *second_pred, -#if CONFIG_AFFINE_MOTION int w, int h, int use_upsampled_ref) { -#else - int w, int h) { -#endif SETUP_SUBPEL_SEARCH; -#if CONFIG_AFFINE_MOTION (void) use_upsampled_ref; -#endif besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, @@ -607,15 +593,9 @@ int vp10_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, int *distortion, unsigned int *sse1, const uint8_t *second_pred, -#if CONFIG_AFFINE_MOTION int w, int h, int use_upsampled_ref) { -#else - int w, int h) { -#endif SETUP_SUBPEL_SEARCH; -#if CONFIG_AFFINE_MOTION (void) use_upsampled_ref; -#endif besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, @@ -705,9 +685,8 @@ static const MV search_step_table[12] = { {0, -1}, {0, 1}, {-1, 0}, {1, 0} }; - -#if CONFIG_AFFINE_MOTION #if CONFIG_VP9_HIGHBITDEPTH +// TODO(yunqing): Optimize the following 2 functions. static void highbd_comp_avg_upsampled_pred(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, @@ -798,7 +777,6 @@ static unsigned int upsampled_setup_center_error( besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); return besterr; } -#endif int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, @@ -812,11 +790,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, int *distortion, unsigned int *sse1, const uint8_t *second_pred, -#if CONFIG_AFFINE_MOTION int w, int h, int use_upsampled_ref) { -#else - int w, int h) { -#endif const uint8_t *const z = x->plane[0].src.buf; const uint8_t *const src_address = z; const int src_stride = x->plane[0].src.stride; @@ -852,7 +826,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, bestmv->row *= 8; bestmv->col *= 8; -#if CONFIG_AFFINE_MOTION // use_upsampled_ref can be 0 or 1 if (use_upsampled_ref) besterr = upsampled_setup_center_error(xd, bestmv, ref_mv, error_per_bit, @@ -860,7 +833,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, second_pred, w, h, (offset << 3), mvjcost, mvcost, sse1, distortion); else -#endif besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, @@ -876,7 +848,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { MV this_mv = {tr, tc}; -#if CONFIG_AFFINE_MOTION if (use_upsampled_ref) { const uint8_t *const pre_address = y + tr * y_stride + tc; @@ -884,7 +855,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, pre_address, y_stride, second_pred, w, h, &sse); } else { -#endif const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); if (second_pred == NULL) @@ -893,9 +863,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, else thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); -#if CONFIG_AFFINE_MOTION } -#endif cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -920,7 +888,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { MV this_mv = {tr, tc}; -#if CONFIG_AFFINE_MOTION if (use_upsampled_ref) { const uint8_t *const pre_address = y + tr * y_stride + tc; @@ -928,7 +895,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, pre_address, y_stride, second_pred, w, h, &sse); } else { -#endif const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); if (second_pred == NULL) @@ -937,9 +903,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, else thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); -#if CONFIG_AFFINE_MOTION } -#endif cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -963,15 +927,11 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, } if (iters_per_step > 1 && best_idx != -1) { -#if CONFIG_AFFINE_MOTION if (use_upsampled_ref) { SECOND_LEVEL_CHECKS_BEST(1); } else { -#endif SECOND_LEVEL_CHECKS_BEST(0); -#if CONFIG_AFFINE_MOTION } -#endif } tr = br; diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h index a430c76c2d9e795bfdc84b91b2c631db76d4ff37..f99cd8b17edd21780988bf0199422a58967b034f 100644 --- a/vp10/encoder/mcomp.h +++ b/vp10/encoder/mcomp.h @@ -116,11 +116,7 @@ typedef int (fractional_mv_step_fp) ( int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, -#if CONFIG_AFFINE_MOTION int w, int h, int use_upsampled_ref); -#else - int w, int h); -#endif extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned; diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index bb62e32edfc9c4916de10f7d6f51c10ae91dd329..665ba7dc487f993282d18a0f91c48ede5aef6d1c 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -4664,52 +4664,52 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; -#if CONFIG_AFFINE_MOTION - // Use up-sampled reference frames. - struct macroblockd_plane *const pd = &xd->plane[0]; - struct buf_2d backup_pred = pd->pre[0]; - const YV12_BUFFER_CONFIG *upsampled_ref = - get_upsampled_ref(cpi, refs[id]); - - // Set pred for Y plane - setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer, - upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), - NULL, pd->subsampling_x, pd->subsampling_y); - - // If bsize < BLOCK_8X8, adjust pred pointer for this block - if (bsize < BLOCK_8X8) - pd->pre[0].buf = - &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, block, - pd->pre[0].stride)) << 3]; - - bestsme = cpi->find_fractional_mv_step( - x, &tmp_mv, - &ref_mv[id].as_mv, - cpi->common.allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[bsize], - 0, cpi->sf.mv.subpel_iters_per_step, - NULL, - x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, - pw, ph, 1); - - // Restore the reference frames. - pd->pre[0] = backup_pred; -#else - (void) block; - bestsme = cpi->find_fractional_mv_step( - x, &tmp_mv, - &ref_mv[id].as_mv, - cpi->common.allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[bsize], - 0, cpi->sf.mv.subpel_iters_per_step, - NULL, - x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, - pw, ph); -#endif + if (cpi->sf.use_upsampled_references) { + // Use up-sampled reference frames. + struct macroblockd_plane *const pd = &xd->plane[0]; + struct buf_2d backup_pred = pd->pre[0]; + const YV12_BUFFER_CONFIG *upsampled_ref = + get_upsampled_ref(cpi, refs[id]); + + // Set pred for Y plane + setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer, + upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), + NULL, pd->subsampling_x, pd->subsampling_y); + + // If bsize < BLOCK_8X8, adjust pred pointer for this block + if (bsize < BLOCK_8X8) + pd->pre[0].buf = + &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, block, + pd->pre[0].stride)) << 3]; + + bestsme = cpi->find_fractional_mv_step( + x, &tmp_mv, + &ref_mv[id].as_mv, + cpi->common.allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + 0, cpi->sf.mv.subpel_iters_per_step, + NULL, + x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, + pw, ph, 1); + + // Restore the reference frames. + pd->pre[0] = backup_pred; + } else { + (void) block; + bestsme = cpi->find_fractional_mv_step( + x, &tmp_mv, + &ref_mv[id].as_mv, + cpi->common.allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + 0, cpi->sf.mv.subpel_iters_per_step, + NULL, + x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, + pw, ph, 0); + } } // Restore the pointer to the first (possibly scaled) prediction buffer. @@ -4990,57 +4990,57 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int distortion; -#if CONFIG_AFFINE_MOTION - const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; - const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; - // Use up-sampled reference frames. - struct macroblockd_plane *const pd = &xd->plane[0]; - struct buf_2d backup_pred = pd->pre[0]; - const YV12_BUFFER_CONFIG *upsampled_ref = - get_upsampled_ref(cpi, mbmi->ref_frame[0]); - - // Set pred for Y plane - setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer, - upsampled_ref->y_stride, - (mi_row << 3), (mi_col << 3), - NULL, pd->subsampling_x, pd->subsampling_y); - - // adjust pred pointer for this block - pd->pre[0].buf = - &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i, - pd->pre[0].stride)) << 3]; - - cpi->find_fractional_mv_step( - x, - new_mv, - &bsi->ref_mv[0]->as_mv, - cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, - &distortion, - &x->pred_sse[mbmi->ref_frame[0]], - NULL, pw, ph, 1); - - // Restore the reference frames. - pd->pre[0] = backup_pred; -#else - cpi->find_fractional_mv_step( - x, - new_mv, - &bsi->ref_mv[0]->as_mv, - cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, - &distortion, - &x->pred_sse[mbmi->ref_frame[0]], - NULL, 0, 0); -#endif + if (cpi->sf.use_upsampled_references) { + const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; + const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; + // Use up-sampled reference frames. + struct macroblockd_plane *const pd = &xd->plane[0]; + struct buf_2d backup_pred = pd->pre[0]; + const YV12_BUFFER_CONFIG *upsampled_ref = + get_upsampled_ref(cpi, mbmi->ref_frame[0]); + + // Set pred for Y plane + setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer, + upsampled_ref->y_stride, + (mi_row << 3), (mi_col << 3), + NULL, pd->subsampling_x, pd->subsampling_y); + + // adjust pred pointer for this block + pd->pre[0].buf = + &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i, + pd->pre[0].stride)) << 3]; + + cpi->find_fractional_mv_step( + x, + new_mv, + &bsi->ref_mv[0]->as_mv, + cm->allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, + &distortion, + &x->pred_sse[mbmi->ref_frame[0]], + NULL, pw, ph, 1); + + // Restore the reference frames. + pd->pre[0] = backup_pred; + } else { + cpi->find_fractional_mv_step( + x, + new_mv, + &bsi->ref_mv[0]->as_mv, + cm->allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, + &distortion, + &x->pred_sse[mbmi->ref_frame[0]], + NULL, 0, 0, 0); + } // save motion search result for use in compound prediction #if CONFIG_EXT_INTER @@ -5637,43 +5637,43 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ -#if CONFIG_AFFINE_MOTION - const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; - const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; - // Use up-sampled reference frames. - struct macroblockd_plane *const pd = &xd->plane[0]; - struct buf_2d backup_pred = pd->pre[ref_idx]; - const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); - - // Set pred for Y plane - setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer, - upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), - NULL, pd->subsampling_x, pd->subsampling_y); - - bestsme = cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, - cm->allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, - &dis, &x->pred_sse[ref], NULL, - pw, ph, 1); - - // Restore the reference frames. - pd->pre[ref_idx] = backup_pred; -#else - cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, - cm->allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, - &dis, &x->pred_sse[ref], NULL, 0, 0); -#endif + if (cpi->sf.use_upsampled_references) { + const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; + const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; + // Use up-sampled reference frames. + struct macroblockd_plane *const pd = &xd->plane[0]; + struct buf_2d backup_pred = pd->pre[ref_idx]; + const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); + + // Set pred for Y plane + setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer, + upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), + NULL, pd->subsampling_x, pd->subsampling_y); + + bestsme = cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, + cm->allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, + &dis, &x->pred_sse[ref], NULL, + pw, ph, 1); + + // Restore the reference frames. + pd->pre[ref_idx] = backup_pred; + } else { + cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, + cm->allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, + &dis, &x->pred_sse[ref], NULL, 0, 0, 0); + } } *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h index f4d9b9553734a58d9cf6b940d8e8c04f3aa32cd3..174ad4dadb816109bb8a8b595a2a555159c00fdc 100644 --- a/vp10/encoder/rdopt.h +++ b/vp10/encoder/rdopt.h @@ -103,13 +103,16 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi, int tmp_stride[MAX_MB_PLANE]); #endif // CONFIG_OBMC -#if CONFIG_AFFINE_MOTION static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi, const int ref) { // Use up-sampled reference frames. int ref_idx = 0; if (ref == LAST_FRAME) +#if CONFIG_EXT_REFS + ref_idx = cpi->lst_fb_idxes[ref - LAST_FRAME]; +#else ref_idx = cpi->lst_fb_idx; +#endif else if (ref == GOLDEN_FRAME) ref_idx = cpi->gld_fb_idx; else if (ref == ALTREF_FRAME) @@ -117,7 +120,6 @@ static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi, return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf; } -#endif #ifdef __cplusplus } // extern "C" diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c index ec8acdae29fee4e3b596e28413b8d853cb6a662a..169ae2cfc47e0f3d097f289d8c9b8e2045e05c35 100644 --- a/vp10/encoder/speed_features.c +++ b/vp10/encoder/speed_features.c @@ -177,6 +177,7 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm, sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->allow_partition_search_skip = 1; + sf->use_upsampled_references = 0; #if CONFIG_EXT_TX sf->tx_type_search = PRUNE_TWO; #endif @@ -279,6 +280,7 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, sf->use_fast_coef_costing = 1; sf->allow_exhaustive_searches = 0; sf->exhaustive_searches_thresh = INT_MAX; + sf->use_upsampled_references = 0; // Use transform domain distortion computation // Note var-tx expt always uses pixel domain distortion. @@ -495,6 +497,11 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { sf->disable_filter_search_var_thresh = 0; sf->adaptive_interp_filter_search = 0; sf->allow_partition_search_skip = 0; +#if CONFIG_EXT_REFS + sf->use_upsampled_references = 0; +#else + sf->use_upsampled_references = 1; +#endif for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h index fbb69882c488c21f6b6662d76832fb17e0ace44e..02ee204a97b9cc76a4464ec8ef8636d8e510e6be 100644 --- a/vp10/encoder/speed_features.h +++ b/vp10/encoder/speed_features.h @@ -479,6 +479,9 @@ typedef struct SPEED_FEATURES { // Fast approximation of vp10_model_rd_from_var_lapndz int simple_model_rd_from_var; + // Do sub-pixel search in up-sampled reference frames + int use_upsampled_references; + // Whether to compute distortion in the image domain (slower but // more accurate), or in the transform domain (faster but less acurate). int use_transform_domain_distortion; diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c index 3e1246a807e37f3066a02c51fe816cba5031a893..b3cf8999b45acc695fdd27ce67b523a4be28ba87 100644 --- a/vp10/encoder/temporal_filter.c +++ b/vp10/encoder/temporal_filter.c @@ -320,11 +320,7 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi, 0, mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL, -#if CONFIG_AFFINE_MOTION &distortion, &sse, NULL, 0, 0, 0); -#else - &distortion, &sse, NULL, 0, 0); -#endif // Restore input state x->plane[0].src = src; diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c index 169769a1ad8adb958e762d72e830ffa8afc95472..ee1e3054a3d49a5f655cc44adea8fb743ad97698 100644 --- a/vpx_dsp/variance.c +++ b/vpx_dsp/variance.c @@ -267,7 +267,6 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, } } -#if CONFIG_AFFINE_MOTION // Get pred block from up-sampled reference. void vpx_upsampled_pred_c(uint8_t *comp_pred, int width, int height, @@ -300,7 +299,6 @@ void vpx_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, ref += stride; } } -#endif #if CONFIG_VP9_HIGHBITDEPTH static void highbd_variance64(const uint8_t *a8, int a_stride, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 583d9fa8950df20af9373cfd42ad4d4f58a12b0a..e5c002a70fbc555a40f971ea4258548c21d7427a 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1233,12 +1233,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # ... # -if (vpx_config("CONFIG_AFFINE_MOTION") eq "yes") { - add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride"; - specialize qw/vpx_upsampled_pred sse2/; - add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; - specialize qw/vpx_comp_avg_upsampled_pred sse2/; -} +add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride"; +specialize qw/vpx_upsampled_pred sse2/; +add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; +specialize qw/vpx_comp_avg_upsampled_pred sse2/; # # ... diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c index 7943c843c408197afff8c6adaa6f9e3d27539555..63fc1e6741690089e9061f01d499c6a5dddb16a5 100644 --- a/vpx_dsp/x86/variance_sse2.c +++ b/vpx_dsp/x86/variance_sse2.c @@ -476,7 +476,6 @@ FNS(ssse3, ssse3); #undef FN #endif // CONFIG_USE_X86INC -#if CONFIG_AFFINE_MOTION void vpx_upsampled_pred_sse2(uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride) { @@ -703,4 +702,3 @@ void vpx_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred, } } } -#endif