Commit 18b6e9a3 authored by Yaowu Xu's avatar Yaowu Xu

Merge branch 'masterbase' into nextgenv2

Conflicts:
	vp10/encoder/rdopt.c

Change-Id: If720e7f9810378d24bf9fd51a95fd29c3bc5d774
parents 13efa8a0 34d12d11
......@@ -94,13 +94,53 @@ unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
if (frame < 10)
return val;
if (frame < 20)
return val / 2;
return val * 3 / 4;
if (frame < 30)
return val * 2 / 3;
return val / 2;
if (frame < 40)
return val / 4;
return val;
if (frame < 50)
return val * 7 / 8;
return val * 3 / 4;
if (frame < 60)
return val / 2;
if (frame < 70)
return val * 3 / 4;
if (frame < 80)
return val;
if (frame < 90)
return val * 3 / 4;
if (frame < 100)
return val / 2;
if (frame < 110)
return val * 3 / 4;
if (frame < 120)
return val;
if (frame < 130)
return val * 3 / 4;
if (frame < 140)
return val / 2;
if (frame < 150)
return val * 3 / 4;
if (frame < 160)
return val;
if (frame < 170)
return val / 2;
if (frame < 180)
return val * 3 / 4;
if (frame < 190)
return val;
if (frame < 200)
return val * 3 / 4;
if (frame < 210)
return val / 2;
if (frame < 220)
return val * 3 / 4;
if (frame < 230)
return val;
if (frame < 240)
return val / 2;
if (frame < 250)
return val * 3 / 4;
return val;
}
......@@ -108,7 +148,7 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
public:
ResizingVideoSource() {
SetSize(kInitialWidth, kInitialHeight);
limit_ = 60;
limit_ = 300;
}
virtual ~ResizingVideoSource() {}
......@@ -347,6 +387,8 @@ class ResizeRealtimeTest : public ::libvpx_test::EncoderTest,
TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
ResizingVideoSource video;
DefaultConfig();
// Disable internal resize for this test.
cfg_.rc_resize_allowed = 0;
change_bitrate_ = false;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
......
......@@ -12,6 +12,7 @@
#ifndef VP8_COMMON_THREADING_H_
#define VP8_COMMON_THREADING_H_
#include "./vpx_config.h"
#ifdef __cplusplus
extern "C" {
......@@ -20,7 +21,7 @@ extern "C" {
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
/* Thread management macros */
#ifdef _WIN32
#if defined(_WIN32) && !HAVE_PTHREAD_H
/* Win32 */
#include <process.h>
#include <windows.h>
......@@ -77,8 +78,8 @@ extern "C" {
#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor);
#endif
/* Syncrhronization macros: Win32 and Pthreads */
#ifdef _WIN32
/* Synchronization macros: Win32 and Pthreads */
#if defined(_WIN32) && !HAVE_PTHREAD_H
#define sem_t HANDLE
#define pause(voidpara) __asm PAUSE
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
......
......@@ -737,7 +737,8 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
"Reference frame has invalid dimensions");
is_scaled = vp9_is_scaled(sf);
vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, sf);
vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
is_scaled ? sf : NULL);
xd->block_refs[ref] = ref_buf;
if (sb_type < BLOCK_8X8) {
......
......@@ -81,8 +81,8 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
return (TX_SIZE)tx_size;
}
static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
int allow_select, vpx_reader *r) {
static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
int allow_select, vpx_reader *r) {
TX_MODE tx_mode = cm->tx_mode;
BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
......@@ -416,6 +416,14 @@ static INLINE int is_mv_valid(const MV *mv) {
mv->col > MV_LOW && mv->col < MV_UPP;
}
static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) {
memcpy(dst, src, sizeof(*dst) * 2);
}
static INLINE void zero_mv_pair(int_mv *dst) {
memset(dst, 0, sizeof(*dst) * 2);
}
static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd,
PREDICTION_MODE mode,
int_mv mv[2], int_mv ref_mv[2],
......@@ -437,13 +445,11 @@ static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd,
}
case NEARMV:
case NEARESTMV: {
mv[0].as_int = near_nearest_mv[0].as_int;
mv[1].as_int = near_nearest_mv[1].as_int;
copy_mv_pair(mv, near_nearest_mv);
break;
}
case ZEROMV: {
mv[0].as_int = 0;
mv[1].as_int = 0;
zero_mv_pair(mv);
break;
}
default: {
......@@ -824,8 +830,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
mi->mode = b_mode;
mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
copy_mv_pair(mi->mv, mi->bmi[3].as_mv);
} else {
xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs,
best_ref_mvs, is_compound, allow_hp, r);
......@@ -839,8 +844,6 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi,
MODE_INFO *const mi = xd->mi[0];
int inter_block;
mi->mv[0].as_int = 0;
mi->mv[1].as_int = 0;
mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
mi->skip = read_skip(cm, xd, mi->segment_id, r);
inter_block = read_is_inter_block(cm, xd, mi->segment_id, r);
......@@ -852,6 +855,11 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi,
read_intra_block_mode_info(cm, xd, mi, r);
}
static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst,
const MV_REFERENCE_FRAME *src) {
memcpy(dst, src, sizeof(*dst) * 2);
}
void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
int mi_row, int mi_col, vpx_reader *r,
int x_mis, int y_mis) {
......@@ -866,14 +874,12 @@ void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r);
for (h = 0; h < y_mis; ++h) {
MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
for (w = 0; w < x_mis; ++w) {
MV_REF *const mv = frame_mv + w;
mv->ref_frame[0] = mi->ref_frame[0];
mv->ref_frame[1] = mi->ref_frame[1];
mv->mv[0].as_int = mi->mv[0].as_int;
mv->mv[1].as_int = mi->mv[1].as_int;
MV_REF *const mv = frame_mvs + w;
copy_ref_frame_pair(mv->ref_frame, mi->ref_frame);
copy_mv_pair(mv->mv, mi->mv);
}
frame_mvs += cm->mi_cols;
}
}
}
......@@ -58,29 +58,6 @@ void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
vpx_free(cr);
}
// Check if we should turn off cyclic refresh based on bitrate condition.
// TODO(marpan): May be better in some cases to just reduce the amount/delta-qp
// instead of completely shutting off.
static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm,
const RATE_CONTROL *rc) {
// Turn off cyclic refresh if bits available per frame is not sufficiently
// larger than bit cost of segmentation. Segment map bit cost should scale
// with number of seg blocks, so compare available bits to number of blocks.
// Average bits available per frame = avg_frame_bandwidth
// Number of (8x8) blocks in frame = mi_rows * mi_cols;
const float factor = 0.15f;
const int number_blocks = cm->mi_rows * cm->mi_cols;
// The condition below corresponds to turning off at target bitrates:
// (at 30fps), ~8kbps for CIF, 20kbps for VGA, 60kps for HD/720p.
// Also turn off at very small frame sizes, to avoid too large fraction of
// superblocks to be refreshed per frame. Threshold below is less than QCIF.
if (rc->avg_frame_bandwidth < factor * number_blocks ||
number_blocks / 64 < 5)
return 0;
else
return 1;
}
// Check if this coding block, of size bsize, should be considered for refresh
// (lower-qp coding). Decision can be based on various factors, such as
// size of the coding block (i.e., below min_block size rejected), coding
......@@ -534,7 +511,10 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
struct segmentation *const seg = &cm->seg;
const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
// TODO(marpan): Look into whether we should reduce the amount/delta-qp
// instead of completely shutting off at low bitrates. For now keep it on.
// const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
const int apply_cyclic_refresh = 1;
if (cm->current_video_frame == 0)
cr->low_content_avg = 0.0;
// Don't apply refresh on key frame or temporal enhancement layer frames.
......@@ -627,4 +607,5 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
cr->sb_index = 0;
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
}
......@@ -65,8 +65,14 @@ struct macroblock {
int skip_optimize;
int q_index;
// The equivalent error at the current rdmult of one whole bit (not one
// bitcost unit).
int errorperbit;
// The equivalend SAD error of one (whole) bit at the current quantizer
// for large blocks.
int sadperbit16;
// The equivalend SAD error of one (whole) bit at the current quantizer
// for sub-8x8 blocks.
int sadperbit4;
int rddiv;
int rdmult;
......
......@@ -332,7 +332,7 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
struct buf_2d src = mb->plane[0].src;
int is_skin = 0;
if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenLow) {
if (bs <= BLOCK_32X32 && denoiser->denoising_level >= kDenLow) {
is_skin = vp9_compute_skin_block(mb->plane[0].src.buf,
mb->plane[1].src.buf,
mb->plane[2].src.buf,
......
......@@ -1525,6 +1525,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
cm->width = cpi->oxcf.width;
cm->height = cpi->oxcf.height;
cpi->external_resize = 1;
}
if (cpi->initial_width) {
......@@ -1536,10 +1537,15 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
alloc_compressor_data(cpi);
realloc_segmentation_maps(cpi);
cpi->initial_width = cpi->initial_height = 0;
cpi->external_resize = 0;
}
}
update_frame_size(cpi);
if ((last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) &&
cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_reset_resize(cpi);
if ((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
((cpi->svc.number_temporal_layers > 1 ||
......@@ -1667,6 +1673,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->use_svc = 0;
cpi->resize_state = 0;
cpi->external_resize = 0;
cpi->resize_avg_qp = 0;
cpi->resize_buffer_underflow = 0;
cpi->use_skin_detection = 0;
......@@ -2959,8 +2966,19 @@ void vp9_scale_references(VP9_COMP *cpi) {
}
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
int buf_idx;
RefCntBuffer *buf = NULL;
if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
// Check for release of scaled reference.
buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL;
if (buf != NULL) {
--buf->ref_count;
cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
}
}
buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
buf = &pool->frame_bufs[buf_idx];
buf->buf.y_crop_width = ref->y_crop_width;
buf->buf.y_crop_height = ref->y_crop_height;
cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
......@@ -4129,7 +4147,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
const int use_highbitdepth = sd->flags & YV12_FLAG_HIGHBITDEPTH;
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#else
check_initial_width(cpi, subsampling_x, subsampling_y);
......
......@@ -485,6 +485,7 @@ typedef struct VP9_COMP {
int resize_pending;
int resize_state;
int external_resize;
int resize_scale_num;
int resize_scale_den;
int resize_avg_qp;
......
......@@ -80,27 +80,29 @@ int vp9_mv_bit_cost(const MV *mv, const MV *ref,
return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}
static int mv_err_cost(const MV *mv, const MV *ref,
const int *mvjcost, int *mvcost[2],
int error_per_bit) {
#define PIXEL_TRANSFORM_ERROR_SCALE 4
static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
int *mvcost[2], int error_per_bit) {
if (mvcost) {
const MV diff = { mv->row - ref->row,
mv->col - ref->col };
// TODO(aconverse): See if this shift needs to be tied to
// VP9_PROB_COST_SHIFT.
return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, mvjcost, mvcost) *
error_per_bit, 13);
const MV diff = {mv->row - ref->row, mv->col - ref->col};
// This product sits at a 32-bit ceiling right now and any additional
// accuracy in either bit cost or error cost will cause it to overflow.
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
PIXEL_TRANSFORM_ERROR_SCALE);
}
return 0;
}
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
int error_per_bit) {
int sad_per_bit) {
const MV diff = { mv->row - ref->row,
mv->col - ref->col };
// TODO(aconverse): See if this shift needs to be tied to VP9_PROB_COST_SHIFT.
return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, x->nmvjointsadcost,
x->nmvsadcost) * error_per_bit, 8);
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *
sad_per_bit,
VP9_PROB_COST_SHIFT);
}
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
......@@ -152,12 +154,13 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
* could reduce the area.
*/
/* estimated cost of a motion vector (r,c) */
/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
* from the same math as in mv_err_cost(). */
#define MVC(r, c) \
(mvcost ? \
((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
error_per_bit + 4096) >> 13 : 0)
error_per_bit + 8192) >> 14 : 0)
// convert motion vector component to offset for sv[a]f calc
......
......@@ -949,7 +949,8 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
// TODO(jingning): Skip is signalled per prediciton block not per tx block.
rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
} else {
unsigned int var, sse;
unsigned int var = 0;
unsigned int sse = 0;
model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse,
plane, plane);
}
......
......@@ -342,8 +342,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
x->errorperbit = rdmult >> 6;
x->errorperbit += (x->errorperbit == 0);
set_error_per_bit(x, rdmult);
vp9_initialize_me_consts(cpi, x, x->q_index);
}
......
......@@ -41,7 +41,6 @@
#include "vp9/encoder/vp9_tokenize.h"
#define RD_THRESH_POW 1.25
#define RD_MULT_EPB_RATIO 64
// Factor to weigh the rate for switchable interp filters.
#define SWITCHABLE_INTERP_RATE_FACTOR 1
......@@ -279,8 +278,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
x->errorperbit += (x->errorperbit == 0);
set_error_per_bit(x, rd->RDMULT);
x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ? 0 : 1;
......
......@@ -24,6 +24,7 @@ extern "C" {
#endif
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
#define RDCOST(RM, DM, R, D) \
(ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))
......@@ -168,6 +169,11 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
}
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
x->errorperbit = rdmult >> RD_EPB_SHIFT;
x->errorperbit += (x->errorperbit == 0);
}
void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size);
......
......@@ -3355,24 +3355,25 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
}
if (!disable_skip) {
vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
if (skippable) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// Cost the skip mb case
rate2 += vp9_cost_bit(skip_prob, 1);
rate2 += skip_cost1;
} else if (ref_frame != INTRA_FRAME && !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv,
rate_y + rate_uv + vp9_cost_bit(skip_prob, 0),
distortion2) <
RDCOST(x->rdmult, x->rddiv,
vp9_cost_bit(skip_prob, 1), total_sse)) {
rate_y + rate_uv + skip_cost0, distortion2) <
RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
// Add in the cost of the no skip flag.
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
rate2 += skip_cost0;
} else {
// FIXME(rbultje) make this work for splitmv also
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
rate2 += skip_cost1;
distortion2 = total_sse;
assert(total_sse >= 0);
rate2 -= (rate_y + rate_uv);
......@@ -3380,7 +3381,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
}
} else {
// Add in the cost of the no skip flag.
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
rate2 += skip_cost0;
}
// Calculate the final RD estimate for this mode.
......@@ -4152,17 +4153,21 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
}
if (!disable_skip) {
const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
// Skip is never coded at the segment level for sub8x8 blocks and instead
// always coded in the bitstream at the mode info level.
if (ref_frame != INTRA_FRAME && !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
if (RDCOST(x->rdmult, x->rddiv,
rate_y + rate_uv + skip_cost0, distortion2) <
RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
// Add in the cost of the no skip flag.
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
rate2 += skip_cost0;
} else {
// FIXME(rbultje) make this work for splitmv also
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
rate2 += skip_cost1;
distortion2 = total_sse;
assert(total_sse >= 0);
rate2 -= (rate_y + rate_uv);
......@@ -4172,7 +4177,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
}
} else {
// Add in the cost of the no skip flag.
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
rate2 += skip_cost0;
}
// Calculate the final RD estimate for this mode.
......
......@@ -303,11 +303,13 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
// Disable reference masking if using spatial scaling since
// pred_mv_sad will not be set (since vp9_mv_pred will not
// be called).
// TODO(marpan/agrange): Fix this condition.
sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
// Disable reference masking if using spatial scaling or for dynamic
// resizing (internal or external) since pred_mv_sad will not be set
// (since vp9_mv_pred will not be called).
// TODO(marpan): Fix this condition to allow reference masking for when
// all references have same resolution as source frame.
sf->reference_masking = (cpi->external_resize == 0 &&
cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
cpi->svc.number_spatial_layers == 1) ? 1 : 0;
sf->disable_filter_search_var_thresh = 50;
......
......@@ -47,12 +47,12 @@ static INLINE int mv_cost(const int_mv mv,
}
static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
int error_per_bit) {
int sad_per_bit) {
const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row,
mv.as_mv.col - ref->col);
return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost,
x->nmvsadcost) *
error_per_bit, 8);
sad_per_bit, VP9_PROB_COST_SHIFT);
}
/*****************************************************************************
......
......@@ -119,7 +119,9 @@ endif
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c
endif
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c
......
......@@ -152,20 +152,29 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
dst[0] = AVG3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
int i;
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
// silence a spurious -Warray-bounds warning, possibly related to:
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
uint8_t border[69];
#else
uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right
#endif
dst[stride] = AVG3(above[-1], left[0], left[1]);
for (r = 2; r < bs; ++r)
dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
// dst(bs, bs - 2)[0], i.e., border starting at bottom-left
for (i = 0; i < bs - 2; ++i) {
border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
}
border[bs - 2] = AVG3(above[-1], left[0], left[1]);
border[bs - 1] = AVG3(left[0], above[-1], above[0]);
border[bs - 0] = AVG3(above[-1], above[0], above[1]);
// dst[0][2, size), i.e., remaining top border ascending
for (i = 0; i < bs - 2; ++i) {
border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]);
}
dst += stride;
for (r = 1; r < bs; ++r) {
for (c = 1; c < bs; c++)
dst[