diff --git a/test/resize_test.cc b/test/resize_test.cc index bc91fe22602a5dd8dbbcd2b11f73a488bc16688d..c5f05f31048eeaa2b9964d1c84494057b8917501 100644 --- a/test/resize_test.cc +++ b/test/resize_test.cc @@ -94,13 +94,53 @@ unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) { if (frame < 10) return val; if (frame < 20) - return val / 2; + return val * 3 / 4; if (frame < 30) - return val * 2 / 3; + return val / 2; if (frame < 40) - return val / 4; + return val; if (frame < 50) - return val * 7 / 8; + return val * 3 / 4; + if (frame < 60) + return val / 2; + if (frame < 70) + return val * 3 / 4; + if (frame < 80) + return val; + if (frame < 90) + return val * 3 / 4; + if (frame < 100) + return val / 2; + if (frame < 110) + return val * 3 / 4; + if (frame < 120) + return val; + if (frame < 130) + return val * 3 / 4; + if (frame < 140) + return val / 2; + if (frame < 150) + return val * 3 / 4; + if (frame < 160) + return val; + if (frame < 170) + return val / 2; + if (frame < 180) + return val * 3 / 4; + if (frame < 190) + return val; + if (frame < 200) + return val * 3 / 4; + if (frame < 210) + return val / 2; + if (frame < 220) + return val * 3 / 4; + if (frame < 230) + return val; + if (frame < 240) + return val / 2; + if (frame < 250) + return val * 3 / 4; return val; } @@ -108,7 +148,7 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { public: ResizingVideoSource() { SetSize(kInitialWidth, kInitialHeight); - limit_ = 60; + limit_ = 300; } virtual ~ResizingVideoSource() {} @@ -347,6 +387,8 @@ class ResizeRealtimeTest : public ::libvpx_test::EncoderTest, TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { ResizingVideoSource video; DefaultConfig(); + // Disable internal resize for this test. + cfg_.rc_resize_allowed = 0; change_bitrate_ = false; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); diff --git a/vp8/common/threading.h b/vp8/common/threading.h index a433d03a098a49b43c391d63210eea2905d532e6..c00e517a78d73256f2d745e9cade7d04ce92ee5f 100644 --- a/vp8/common/threading.h +++ b/vp8/common/threading.h @@ -12,6 +12,7 @@ #ifndef VP8_COMMON_THREADING_H_ #define VP8_COMMON_THREADING_H_ +#include "./vpx_config.h" #ifdef __cplusplus extern "C" { @@ -20,7 +21,7 @@ extern "C" { #if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD /* Thread management macros */ -#ifdef _WIN32 +#if defined(_WIN32) && !HAVE_PTHREAD_H /* Win32 */ #include #include @@ -77,8 +78,8 @@ extern "C" { #define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor); #endif -/* Syncrhronization macros: Win32 and Pthreads */ -#ifdef _WIN32 +/* Synchronization macros: Win32 and Pthreads */ +#if defined(_WIN32) && !HAVE_PTHREAD_H #define sem_t HANDLE #define pause(voidpara) __asm PAUSE #define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL) diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 39710c830f7fac4e05070d399c7facd2ae98a07b..8a492d56237a4d410358974c23ea403eb3da5fa2 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -737,7 +737,8 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, "Reference frame has invalid dimensions"); is_scaled = vp9_is_scaled(sf); - vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, sf); + vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, + is_scaled ? sf : NULL); xd->block_refs[ref] = ref_buf; if (sb_type < BLOCK_8X8) { diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 743ab396e5abe33754f2900143db639390bdf970..86044207c21eec9fb83d89e23293a9fd9ceafc0e 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -81,8 +81,8 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, return (TX_SIZE)tx_size; } -static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - int allow_select, vpx_reader *r) { +static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, + int allow_select, vpx_reader *r) { TX_MODE tx_mode = cm->tx_mode; BLOCK_SIZE bsize = xd->mi[0]->sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; @@ -416,6 +416,14 @@ static INLINE int is_mv_valid(const MV *mv) { mv->col > MV_LOW && mv->col < MV_UPP; } +static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + +static INLINE void zero_mv_pair(int_mv *dst) { + memset(dst, 0, sizeof(*dst) * 2); +} + static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, PREDICTION_MODE mode, int_mv mv[2], int_mv ref_mv[2], @@ -437,13 +445,11 @@ static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, } case NEARMV: case NEARESTMV: { - mv[0].as_int = near_nearest_mv[0].as_int; - mv[1].as_int = near_nearest_mv[1].as_int; + copy_mv_pair(mv, near_nearest_mv); break; } case ZEROMV: { - mv[0].as_int = 0; - mv[1].as_int = 0; + zero_mv_pair(mv); break; } default: { @@ -824,8 +830,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, mi->mode = b_mode; - mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; - mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + copy_mv_pair(mi->mv, mi->bmi[3].as_mv); } else { xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, best_ref_mvs, is_compound, allow_hp, r); @@ -839,8 +844,6 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi, MODE_INFO *const mi = xd->mi[0]; int inter_block; - mi->mv[0].as_int = 0; - mi->mv[1].as_int = 0; mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); mi->skip = read_skip(cm, xd, mi->segment_id, r); inter_block = read_is_inter_block(cm, xd, mi->segment_id, r); @@ -852,6 +855,11 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi, read_intra_block_mode_info(cm, xd, mi, r); } +static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst, + const MV_REFERENCE_FRAME *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, int mi_row, int mi_col, vpx_reader *r, int x_mis, int y_mis) { @@ -866,14 +874,12 @@ void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r); for (h = 0; h < y_mis; ++h) { - MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { - MV_REF *const mv = frame_mv + w; - mv->ref_frame[0] = mi->ref_frame[0]; - mv->ref_frame[1] = mi->ref_frame[1]; - mv->mv[0].as_int = mi->mv[0].as_int; - mv->mv[1].as_int = mi->mv[1].as_int; + MV_REF *const mv = frame_mvs + w; + copy_ref_frame_pair(mv->ref_frame, mi->ref_frame); + copy_mv_pair(mv->mv, mi->mv); } + frame_mvs += cm->mi_cols; } } } diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 99e2c5a2875fc44d870bbdf2a8eec2c9c67ed301..ebc850de74d0f91994fd0ca49b2d9c6d037b279b 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -58,29 +58,6 @@ void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { vpx_free(cr); } -// Check if we should turn off cyclic refresh based on bitrate condition. -// TODO(marpan): May be better in some cases to just reduce the amount/delta-qp -// instead of completely shutting off. -static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, - const RATE_CONTROL *rc) { - // Turn off cyclic refresh if bits available per frame is not sufficiently - // larger than bit cost of segmentation. Segment map bit cost should scale - // with number of seg blocks, so compare available bits to number of blocks. - // Average bits available per frame = avg_frame_bandwidth - // Number of (8x8) blocks in frame = mi_rows * mi_cols; - const float factor = 0.15f; - const int number_blocks = cm->mi_rows * cm->mi_cols; - // The condition below corresponds to turning off at target bitrates: - // (at 30fps), ~8kbps for CIF, 20kbps for VGA, 60kps for HD/720p. - // Also turn off at very small frame sizes, to avoid too large fraction of - // superblocks to be refreshed per frame. Threshold below is less than QCIF. - if (rc->avg_frame_bandwidth < factor * number_blocks || - number_blocks / 64 < 5) - return 0; - else - return 1; -} - // Check if this coding block, of size bsize, should be considered for refresh // (lower-qp coding). Decision can be based on various factors, such as // size of the coding block (i.e., below min_block size rejected), coding @@ -534,7 +511,10 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { const RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; - const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); + // TODO(marpan): Look into whether we should reduce the amount/delta-qp + // instead of completely shutting off at low bitrates. For now keep it on. + // const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); + const int apply_cyclic_refresh = 1; if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; // Don't apply refresh on key frame or temporal enhancement layer frames. @@ -627,4 +607,5 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols); cr->sb_index = 0; cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 3eaa9deb846c298a573f6861a6c4f34d64dde290..147743e8d81337236179540a04dbba125494fa3e 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -65,8 +65,14 @@ struct macroblock { int skip_optimize; int q_index; + // The equivalent error at the current rdmult of one whole bit (not one + // bitcost unit). int errorperbit; + // The equivalend SAD error of one (whole) bit at the current quantizer + // for large blocks. int sadperbit16; + // The equivalend SAD error of one (whole) bit at the current quantizer + // for sub-8x8 blocks. int sadperbit4; int rddiv; int rdmult; diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 99118f5dfed64650c8e6567ced0a16fcc1f0a50f..e419cffd8fa4c5114d16230a84e4484ac5aa0db9 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -332,7 +332,7 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, struct buf_2d src = mb->plane[0].src; int is_skin = 0; - if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenLow) { + if (bs <= BLOCK_32X32 && denoiser->denoising_level >= kDenLow) { is_skin = vp9_compute_skin_block(mb->plane[0].src.buf, mb->plane[1].src.buf, mb->plane[2].src.buf, diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 5c67f51df4a957b8178211fece110b8baf5edbc8..f3147e9e9558756697f3e12644eaf1f7cf98f4d3 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1525,6 +1525,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { cm->width = cpi->oxcf.width; cm->height = cpi->oxcf.height; + cpi->external_resize = 1; } if (cpi->initial_width) { @@ -1536,10 +1537,15 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { alloc_compressor_data(cpi); realloc_segmentation_maps(cpi); cpi->initial_width = cpi->initial_height = 0; + cpi->external_resize = 0; } } update_frame_size(cpi); + if ((last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) && + cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_reset_resize(cpi); + if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || ((cpi->svc.number_temporal_layers > 1 || @@ -1667,6 +1673,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->use_svc = 0; cpi->resize_state = 0; + cpi->external_resize = 0; cpi->resize_avg_qp = 0; cpi->resize_buffer_underflow = 0; cpi->use_skin_detection = 0; @@ -2959,8 +2966,19 @@ void vp9_scale_references(VP9_COMP *cpi) { } #endif // CONFIG_VP9_HIGHBITDEPTH } else { - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - RefCntBuffer *const buf = &pool->frame_bufs[buf_idx]; + int buf_idx; + RefCntBuffer *buf = NULL; + if (cpi->oxcf.pass == 0 && !cpi->use_svc) { + // Check for release of scaled reference. + buf_idx = cpi->scaled_ref_idx[ref_frame - 1]; + buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL; + if (buf != NULL) { + --buf->ref_count; + cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; + } + } + buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); + buf = &pool->frame_bufs[buf_idx]; buf->buf.y_crop_width = ref->y_crop_width; buf->buf.y_crop_height = ref->y_crop_height; cpi->scaled_ref_idx[ref_frame - 1] = buf_idx; @@ -4129,7 +4147,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, const int subsampling_x = sd->subsampling_x; const int subsampling_y = sd->subsampling_y; #if CONFIG_VP9_HIGHBITDEPTH - const int use_highbitdepth = sd->flags & YV12_FLAG_HIGHBITDEPTH; + const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0; check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); #else check_initial_width(cpi, subsampling_x, subsampling_y); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 3b74bb8e3048ea4f3b8b597c85b78a60a9820716..8759cbe10c196ad7e1e4e3ae5a6f24d8bfa197be 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -485,6 +485,7 @@ typedef struct VP9_COMP { int resize_pending; int resize_state; + int external_resize; int resize_scale_num; int resize_scale_den; int resize_avg_qp; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 607941cfa5fdafed151d7487d426d05edf30dea7..8b7825e7b69ddabdbf2e09bd051615fe57dbe159 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -80,27 +80,29 @@ int vp9_mv_bit_cost(const MV *mv, const MV *ref, return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); } -static int mv_err_cost(const MV *mv, const MV *ref, - const int *mvjcost, int *mvcost[2], - int error_per_bit) { +#define PIXEL_TRANSFORM_ERROR_SCALE 4 +static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, + int *mvcost[2], int error_per_bit) { if (mvcost) { - const MV diff = { mv->row - ref->row, - mv->col - ref->col }; - // TODO(aconverse): See if this shift needs to be tied to - // VP9_PROB_COST_SHIFT. - return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, mvjcost, mvcost) * - error_per_bit, 13); + const MV diff = {mv->row - ref->row, mv->col - ref->col}; + // This product sits at a 32-bit ceiling right now and any additional + // accuracy in either bit cost or error cost will cause it to overflow. + return ROUND_POWER_OF_TWO( + (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, + RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + + PIXEL_TRANSFORM_ERROR_SCALE); } return 0; } static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, - int error_per_bit) { + int sad_per_bit) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; - // TODO(aconverse): See if this shift needs to be tied to VP9_PROB_COST_SHIFT. - return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, x->nmvjointsadcost, - x->nmvsadcost) * error_per_bit, 8); + return ROUND_POWER_OF_TWO( + (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * + sad_per_bit, + VP9_PROB_COST_SHIFT); } void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { @@ -152,12 +154,13 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { * could reduce the area. */ -/* estimated cost of a motion vector (r,c) */ +/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes + * from the same math as in mv_err_cost(). */ #define MVC(r, c) \ (mvcost ? \ ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ - error_per_bit + 4096) >> 13 : 0) + error_per_bit + 8192) >> 14 : 0) // convert motion vector component to offset for sv[a]f calc diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 829066c9feb6841ef68b6f1c4d9df3bc8729119b..d861f8096713d3ebd3e55018588401dbaa64242a 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -949,7 +949,8 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): Skip is signalled per prediciton block not per tx block. rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable); } else { - unsigned int var, sse; + unsigned int var = 0; + unsigned int sse = 0; model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse, plane, plane); } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 980a49f0a5e8d95b8a107805c208f5454bee77ca..91f877ed7eea94b3b5a0525a073c60a29f012d5b 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -342,8 +342,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); x->q_index = qindex; - x->errorperbit = rdmult >> 6; - x->errorperbit += (x->errorperbit == 0); + set_error_per_bit(x, rdmult); vp9_initialize_me_consts(cpi, x, x->q_index); } diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index ad2027c1c80f26fc3745a31ea83acedeb6cefa76..44b6ae71a3d5318c6a9a6a353fe1a6e1c8969676 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -41,7 +41,6 @@ #include "vp9/encoder/vp9_tokenize.h" #define RD_THRESH_POW 1.25 -#define RD_MULT_EPB_RATIO 64 // Factor to weigh the rate for switchable interp filters. #define SWITCHABLE_INTERP_RATE_FACTOR 1 @@ -279,8 +278,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128). rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO; - x->errorperbit += (x->errorperbit == 0); + set_error_per_bit(x, rd->RDMULT); x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && cm->frame_type != KEY_FRAME) ? 0 : 1; diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index a92b14edf0e015ffc14e9c0e7866911b4c1d8642..9b8e2732c5ef736811cc1ce5d87067b2fb56c0c4 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -24,6 +24,7 @@ extern "C" { #endif #define RDDIV_BITS 7 +#define RD_EPB_SHIFT 6 #define RDCOST(RM, DM, R, D) \ (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM)) @@ -168,6 +169,11 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; } +static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) { + x->errorperbit = rdmult >> RD_EPB_SHIFT; + x->errorperbit += (x->errorperbit == 0); +} + void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f00a58ce247003f886a324d7d4f577b9310e1d5a..1480ea4182477a2d91faea353a549f0cfaca964d 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3355,24 +3355,25 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, } if (!disable_skip) { - vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); + const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); + const int skip_cost0 = vp9_cost_bit(skip_prob, 0); + const int skip_cost1 = vp9_cost_bit(skip_prob, 1); + if (skippable) { // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // Cost the skip mb case - rate2 += vp9_cost_bit(skip_prob, 1); + rate2 += skip_cost1; } else if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, - rate_y + rate_uv + vp9_cost_bit(skip_prob, 0), - distortion2) < - RDCOST(x->rdmult, x->rddiv, - vp9_cost_bit(skip_prob, 1), total_sse)) { + rate_y + rate_uv + skip_cost0, distortion2) < + RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } else { // FIXME(rbultje) make this work for splitmv also - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + rate2 += skip_cost1; distortion2 = total_sse; assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); @@ -3380,7 +3381,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, } } else { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } // Calculate the final RD estimate for this mode. @@ -4152,17 +4153,21 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, } if (!disable_skip) { + const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); + const int skip_cost0 = vp9_cost_bit(skip_prob, 0); + const int skip_cost1 = vp9_cost_bit(skip_prob, 1); + // Skip is never coded at the segment level for sub8x8 blocks and instead // always coded in the bitstream at the mode info level. - if (ref_frame != INTRA_FRAME && !xd->lossless) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < - RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { + if (RDCOST(x->rdmult, x->rddiv, + rate_y + rate_uv + skip_cost0, distortion2) < + RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } else { // FIXME(rbultje) make this work for splitmv also - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + rate2 += skip_cost1; distortion2 = total_sse; assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); @@ -4172,7 +4177,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, } } else { // Add in the cost of the no skip flag. - rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + rate2 += skip_cost0; } // Calculate the final RD estimate for this mode. diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index b4f20fcd62b6b0781bda113aa0c08fa1f5985264..8a34fd9add1bb505230b700b8f0ceb3cb8f1e483 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -303,11 +303,13 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, FLAG_SKIP_INTRA_LOWVAR; sf->adaptive_pred_interp_filter = 2; - // Disable reference masking if using spatial scaling since - // pred_mv_sad will not be set (since vp9_mv_pred will not - // be called). - // TODO(marpan/agrange): Fix this condition. - sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC && + // Disable reference masking if using spatial scaling or for dynamic + // resizing (internal or external) since pred_mv_sad will not be set + // (since vp9_mv_pred will not be called). + // TODO(marpan): Fix this condition to allow reference masking for when + // all references have same resolution as source frame. + sf->reference_masking = (cpi->external_resize == 0 && + cpi->oxcf.resize_mode != RESIZE_DYNAMIC && cpi->svc.number_spatial_layers == 1) ? 1 : 0; sf->disable_filter_search_var_thresh = 50; diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c index b475f8db1131b124d85c4d594751b37106835a9d..0bc417fc15ad17ff44ab5178f44e6094d028085e 100644 --- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c +++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c @@ -47,12 +47,12 @@ static INLINE int mv_cost(const int_mv mv, } static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref, - int error_per_bit) { + int sad_per_bit) { const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row, mv.as_mv.col - ref->col); return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost, x->nmvsadcost) * - error_per_bit, 8); + sad_per_bit, VP9_PROB_COST_SHIFT); } /***************************************************************************** diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 83a91e87007a6d31c386e47f8d351878aa231511..2930c23ddf77b01b4146220710e0de40f5796388 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -119,7 +119,9 @@ endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c +endif ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c diff --git a/vpx_dsp/intrapred.c b/vpx_dsp/intrapred.c index 18bcd87f2852892837c162f46c130aaf1da5fa7c..dcc9b304c51fc06470fc58760e0768fa002d756f 100644 --- a/vpx_dsp/intrapred.c +++ b/vpx_dsp/intrapred.c @@ -152,20 +152,29 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { - int r, c; - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + int i; +#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 + // silence a spurious -Warray-bounds warning, possibly related to: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 + uint8_t border[69]; +#else + uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right +#endif - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; ++r) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + // dst(bs, bs - 2)[0], i.e., border starting at bottom-left + for (i = 0; i < bs - 2; ++i) { + border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); + } + border[bs - 2] = AVG3(above[-1], left[0], left[1]); + border[bs - 1] = AVG3(left[0], above[-1], above[0]); + border[bs - 0] = AVG3(above[-1], above[0], above[1]); + // dst[0][2, size), i.e., remaining top border ascending + for (i = 0; i < bs - 2; ++i) { + border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); + } - dst += stride; - for (r = 1; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-stride + c - 1]; - dst += stride; + for (i = 0; i < bs; ++i) { + memcpy(dst + i * stride, border + bs - 1 - i, bs); } }