Commit 28b1db92 authored by Deb Mukherjee's avatar Deb Mukherjee
Browse files

Refactoring of switchable filter search for speed

Refactors the switchable filter search in the rd loop to
improve encode speed.

Uses a piecewise approximation to a closed form expression to estimate
rd cost for a Laplacian source with a given variance and quantization
step-size.

About 40% encode time reduction is achieved.

Results (on a feb 12 baseline) show a slight drop:

derf: -0.019%
yt: +0.010%
std-hd: -0.162%
hd: -0.050%

Change-Id: Ie861badf5bba1e3b1052e29a0ef1b7e256edbcd0
parent 1d60b6bc
......@@ -423,6 +423,14 @@ const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, -1, 0, 1, -1, -1};
#else
const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, 0, 1, -1, -1};
#endif
#endif // VP9_SWITCHABLE_FILTERS
// Indicates if the filter is interpolating or non-interpolating
// Note currently only the EIGHTTAP_SMOOTH is non-interpolating
#if CONFIG_ENABLE_6TAP
const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {1, 0, 1, 1, 1, -1};
#else
const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {0, 1, 1, 1, -1};
#endif
void vp9_entropy_mode_init() {
......
......@@ -104,6 +104,8 @@ extern const INTERPOLATIONFILTERTYPE vp9_switchable_interp
extern const int vp9_switchable_interp_map[SWITCHABLE + 1];
extern const int vp9_is_interpolating_filter[SWITCHABLE + 1];
extern const vp9_tree_index vp9_switchable_interp_tree
[2 * (VP9_SWITCHABLE_FILTERS - 1)];
......
......@@ -3210,6 +3210,49 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
}
static void model_rd_from_var_lapndz(int var, int n, int qstep,
int *rate, int *dist) {
// This function models the rate and distortion for a Laplacian
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expressions are in:
// Hang and Chen, "Source Model for transform video coder and its
// application - Part I: Fundamental Theory", IEEE Trans. Circ.
// Sys. for Video Tech., April 1997.
// The function is implemented as piecewise approximation to the
// exact computation.
// TODO(debargha): Implement the functions by interpolating from a
// look-up table
vp9_clear_system_state();
{
double D, R;
double s2 = (double) var / n;
double s = sqrt(s2);
double x = qstep / s;
if (x > 1.0) {
double y = exp(-x / 2);
double y2 = y * y;
D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
} else {
double x2 = x * x;
D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
if (x > 0.125)
R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
0.1626989668625);
else
R = -1.442252874826093 * log(x) + 1.944647760719664;
}
if (R < 0) {
*rate = 0;
*dist = var;
} else {
*rate = (n * R * 256 + 0.5);
*dist = (n * D * s2 + 0.5);
}
}
vp9_clear_system_state();
}
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
enum BlockSize block_size,
int *saddone, int near_sadidx[],
......@@ -3223,6 +3266,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate_uv, int *distortion_uv,
int *mode_excluded, int *disable_skip,
int mode_index,
INTERPOLATIONFILTERTYPE *best_filter,
int_mv frame_mv[MB_MODE_COUNT]
[MAX_REF_FRAMES]) {
VP9_COMMON *cm = &cpi->common;
......@@ -3242,6 +3286,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int_mv cur_mv[2];
int_mv ref_mv[2];
int64_t this_rd = 0;
unsigned char tmp_ybuf[64 * 64];
unsigned char tmp_ubuf[32 * 32];
unsigned char tmp_vbuf[32 * 32];
int pred_exists = 0;
int interpolating_intpel_seen = 0;
int intpel_mv;
int64_t rd, best_rd = INT64_MAX;
switch (this_mode) {
case NEWMV:
......@@ -3331,11 +3382,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
const int m = vp9_switchable_interp_map[mbmi->interp_filter];
*rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
/* We don't include the cost of the second reference here, because there
* are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
......@@ -3360,36 +3406,358 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
#endif
pred_exists = 0;
interpolating_intpel_seen = 0;
// Are all MVs integer pel for Y and UV
intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
(mbmi->mv[0].as_mv.col & 15) == 0;
if (is_comp_pred)
intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
(mbmi->mv[1].as_mv.col & 15) == 0;
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
if (block_size == BLOCK_64X64) {
vp9_build_inter64x64_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
int switchable_filter_index, newbest;
int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
for (switchable_filter_index = 0;
switchable_filter_index < VP9_SWITCHABLE_FILTERS;
++switchable_filter_index) {
int rs = 0;
mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
const int m = vp9_switchable_interp_map[mbmi->interp_filter];
rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
if (interpolating_intpel_seen && intpel_mv &&
vp9_is_interpolating_filter[mbmi->interp_filter]) {
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
} else {
unsigned int sse, var;
int tmp_rate_y, tmp_rate_u, tmp_rate_v;
int tmp_dist_y, tmp_dist_u, tmp_dist_v;
vp9_build_inter64x64_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
var = vp9_variance64x64(*(b->base_src), b->src_stride,
xd->dst.y_buffer, xd->dst.y_stride, &sse);
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
model_rd_from_var_lapndz(var, 64 * 64, xd->block[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride,
xd->dst.u_buffer, xd->dst.uv_stride, &sse);
model_rd_from_var_lapndz(var, 32 * 32, xd->block[16].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride,
xd->dst.v_buffer, xd->dst.uv_stride, &sse);
model_rd_from_var_lapndz(var, 32 * 32, xd->block[20].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
tmp_dist_y + tmp_dist_u + tmp_dist_v);
if (!interpolating_intpel_seen && intpel_mv &&
vp9_is_interpolating_filter[mbmi->interp_filter]) {
tmp_rate_y_i = tmp_rate_y;
tmp_rate_u_i = tmp_rate_u;
tmp_rate_v_i = tmp_rate_v;
tmp_dist_y_i = tmp_dist_y;
tmp_dist_u_i = tmp_dist_u;
tmp_dist_v_i = tmp_dist_v;
}
}
newbest = (switchable_filter_index == 0 || rd < best_rd);
if (newbest) {
best_rd = rd;
*best_filter = mbmi->interp_filter;
}
if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
(cm->mcomp_filter_type != SWITCHABLE &&
cm->mcomp_filter_type == mbmi->interp_filter)) {
int i;
for (i = 0; i < 64; ++i)
vpx_memcpy(tmp_ybuf + i * 64,
xd->dst.y_buffer + i * xd->dst.y_stride,
sizeof(unsigned char) * 64);
for (i = 0; i < 32; ++i)
vpx_memcpy(tmp_ubuf + i * 32,
xd->dst.u_buffer + i * xd->dst.uv_stride,
sizeof(unsigned char) * 32);
for (i = 0; i < 32; ++i)
vpx_memcpy(tmp_vbuf + i * 32,
xd->dst.v_buffer + i * xd->dst.uv_stride,
sizeof(unsigned char) * 32);
pred_exists = 1;
}
interpolating_intpel_seen |=
intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
}
} else if (block_size == BLOCK_32X32) {
vp9_build_inter32x32_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
int switchable_filter_index, newbest;
int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
for (switchable_filter_index = 0;
switchable_filter_index < VP9_SWITCHABLE_FILTERS;
++switchable_filter_index) {
int rs = 0;
mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
const int m = vp9_switchable_interp_map[mbmi->interp_filter];
rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
if (interpolating_intpel_seen && intpel_mv &&
vp9_is_interpolating_filter[mbmi->interp_filter]) {
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
} else {
unsigned int sse, var;
int tmp_rate_y, tmp_rate_u, tmp_rate_v;
int tmp_dist_y, tmp_dist_u, tmp_dist_v;
vp9_build_inter32x32_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
var = vp9_variance32x32(*(b->base_src), b->src_stride,
xd->dst.y_buffer, xd->dst.y_stride, &sse);
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
model_rd_from_var_lapndz(var, 32 * 32, xd->block[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride,
xd->dst.u_buffer, xd->dst.uv_stride, &sse);
model_rd_from_var_lapndz(var, 16 * 16, xd->block[16].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride,
xd->dst.v_buffer, xd->dst.uv_stride, &sse);
model_rd_from_var_lapndz(var, 16 * 16, xd->block[20].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
tmp_dist_y + tmp_dist_u + tmp_dist_v);
if (!interpolating_intpel_seen && intpel_mv &&
vp9_is_interpolating_filter[mbmi->interp_filter]) {
tmp_rate_y_i = tmp_rate_y;
tmp_rate_u_i = tmp_rate_u;
tmp_rate_v_i = tmp_rate_v;
tmp_dist_y_i = tmp_dist_y;
tmp_dist_u_i = tmp_dist_u;
tmp_dist_v_i = tmp_dist_v;
}
}
newbest = (switchable_filter_index == 0 || rd < best_rd);
if (newbest) {
best_rd = rd;
*best_filter = mbmi->interp_filter;
}
if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
(cm->mcomp_filter_type != SWITCHABLE &&
cm->mcomp_filter_type == mbmi->interp_filter)) {
int i;
for (i = 0; i < 32; ++i)
vpx_memcpy(tmp_ybuf + i * 64,
xd->dst.y_buffer + i * xd->dst.y_stride,
sizeof(unsigned char) * 32);
for (i = 0; i < 16; ++i)
vpx_memcpy(tmp_ubuf + i * 32,
xd->dst.u_buffer + i * xd->dst.uv_stride,
sizeof(unsigned char) * 16);
for (i = 0; i < 16; ++i)
vpx_memcpy(tmp_vbuf + i * 32,
xd->dst.v_buffer + i * xd->dst.uv_stride,
sizeof(unsigned char) * 16);
pred_exists = 1;
}
interpolating_intpel_seen |=
intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
}
} else {
int switchable_filter_index, newbest;
int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
assert(block_size == BLOCK_16X16);
vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
for (switchable_filter_index = 0;
switchable_filter_index < VP9_SWITCHABLE_FILTERS;
++switchable_filter_index) {
int rs = 0;
mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
const int m = vp9_switchable_interp_map[mbmi->interp_filter];
rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
if (interpolating_intpel_seen && intpel_mv &&
vp9_is_interpolating_filter[mbmi->interp_filter]) {
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
} else {
unsigned int sse, var;
int tmp_rate_y, tmp_rate_u, tmp_rate_v;
int tmp_dist_y, tmp_dist_u, tmp_dist_v;
vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
}
#endif
vp9_build_1st_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
xd->predictor + 320, 8);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
xd->predictor + 320, 8);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mbuv(xd, xd->predictor + 256,
xd->predictor + 320, 8);
}
#endif
var = vp9_variance16x16(*(b->base_src), b->src_stride,
xd->predictor, 16, &sse);
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
&xd->predictor[256], 8, &sse);
model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
&xd->predictor[320], 8, &sse);
model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
tmp_dist_y + tmp_dist_u + tmp_dist_v);
if (!interpolating_intpel_seen && intpel_mv &&
vp9_is_interpolating_filter[mbmi->interp_filter]) {
tmp_rate_y_i = tmp_rate_y;
tmp_rate_u_i = tmp_rate_u;
tmp_rate_v_i = tmp_rate_v;
tmp_dist_y_i = tmp_dist_y;
tmp_dist_u_i = tmp_dist_u;
tmp_dist_v_i = tmp_dist_v;
}
}
newbest = (switchable_filter_index == 0 || rd < best_rd);
if (newbest) {
best_rd = rd;
*best_filter = mbmi->interp_filter;
}
if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
(cm->mcomp_filter_type != SWITCHABLE &&
cm->mcomp_filter_type == mbmi->interp_filter)) {
vpx_memcpy(tmp_ybuf, xd->predictor, sizeof(unsigned char) * 256);
vpx_memcpy(tmp_ubuf, xd->predictor + 256, sizeof(unsigned char) * 64);
vpx_memcpy(tmp_vbuf, xd->predictor + 320, sizeof(unsigned char) * 64);
pred_exists = 1;
}
interpolating_intpel_seen |=
intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
}
}
// Set the appripriate filter
if (cm->mcomp_filter_type != SWITCHABLE)
mbmi->interp_filter = cm->mcomp_filter_type;
else
mbmi->interp_filter = *best_filter;
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
if (pred_exists) {
if (block_size == BLOCK_64X64) {
for (i = 0; i < 64; ++i)
vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
sizeof(unsigned char) * 64);
for (i = 0; i < 32; ++i)
vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
sizeof(unsigned char) * 32);
for (i = 0; i < 32; ++i)
vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
sizeof(unsigned char) * 32);
} else if (block_size == BLOCK_32X32) {
for (i = 0; i < 32; ++i)
vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
sizeof(unsigned char) * 32);
for (i = 0; i < 16; ++i)
vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
sizeof(unsigned char) * 16);
for (i = 0; i < 16; ++i)
vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
sizeof(unsigned char) * 16);
} else {
vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256);
vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64);
vpx_memcpy(xd->predictor + 320, tmp_vbuf, sizeof(unsigned char) * 64);
}
} else {
// Handles the special case when a filter that is not in the
// switchable list (ex. bilinear, 6-tap) is indicated at the frame level
if (block_size == BLOCK_64X64) {
vp9_build_inter64x64_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
} else if (block_size == BLOCK_32X32) {
vp9_build_inter32x32_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
} else {
vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
}
#endif
vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
}
#endif
}
}
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
const int m = vp9_switchable_interp_map[mbmi->interp_filter];
*rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
if (cpi->active_map_enabled && x->active_ptr[0] == 0)
x->skip = 1;
else if (x->encode_breakout) {
unsigned int sse, var;
unsigned int var, sse;
int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >> 4);
......@@ -3411,7 +3779,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if ((int)sse < threshold) {
unsigned int q2dc = xd->block[24].dequant[0];
/* If there is no codeable 2nd order dc
or a very small uniform pixel change change */
or a very small uniform pixel change change */
if ((sse - var < q2dc * q2dc >> 4) ||
(sse / 2 > var && sse - var < 64)) {
// Check u and v to make sure skip is ok
......@@ -3452,17 +3820,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
if (!(*mode_excluded)) {
if (is_comp_pred) {
*mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
} else {
*mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
}
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
#endif
}
if (!x->skip) {
if (block_size == BLOCK_64X64) {
int skippable_y, skippable_uv;
......@@ -3496,23 +3853,23 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*skippable = skippable_y && skippable_uv;
} else {
assert(block_size == BLOCK_16X16);
vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
}
#endif
inter_mode_cost(cpi, x, rate2, distortion,
rate_y, distortion_y, rate_uv, distortion_uv,
skippable, txfm_cache);
}
}
if (!(*mode_excluded)) {
if (is_comp_pred) {
*mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
} else {
*mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
}
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
#endif
}
return this_rd; // if 0, this will be re-calculated by caller
}
......@@ -3521,7 +3878,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *returnrate, int *returndistortion,
int64_t *returnintra) {
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
VP9_ALT_FLAG };
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
union b_mode_info best_bmodes[16];
......@@ -3551,6 +3908,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
int64_t best_overall_rd = INT64_MAX;
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
int uv_intra_skippable = 0;
int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0;
......@@ -3558,7 +3916,6 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
int distortion_uv = INT_MAX;
int64_t best_yrd = INT64_MAX;
int switchable_filter_index = 0;
MB_PREDICTION_MODE uv_intra_mode;
MB_PREDICTION_MODE uv_intra_mode_8x8 = 0;
......@@ -3645,8 +4002,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// that depend on the current prediction etc.
estimate_ref_frame_costs(cpi, segment_id, ref_costs);
for (mode_index = 0; mode_index < MAX_MODES;
mode_index += (!switchable_filter_index)) {
for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
int64_t this_rd = INT64_MAX;
int disable_skip = 0, skippable = 0;
int other_cost = 0;
......@@ -3671,19 +4027,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
if (this_mode >= NEARESTMV && this_mode <= SPLITMV) {
mbmi->interp_filter =
vp9_switchable_interp[switchable_filter_index++];