Commit b6b91432 authored by Jingning Han's avatar Jingning Han
Browse files

Dual buffer encoding for intra modes

Overall change (using dual buffer scheme for superblocks of both inter
and intra modes) reduces speed 2 runtime:
bluesky_1080p at 6000kbps:   263553ms -> 257441ms
riverbed_1080p at 8000kbps:  233230ms -> 225308ms.

Change-Id: Idf8d70f768a4b0d97b2a8506372c57b7b4022119
parent 8ce0967d
...@@ -27,16 +27,16 @@ typedef struct { ...@@ -27,16 +27,16 @@ typedef struct {
typedef struct { typedef struct {
MODE_INFO mic; MODE_INFO mic;
uint8_t *zcoeff_blk; uint8_t *zcoeff_blk;
int16_t *coeff[MAX_MB_PLANE][2]; int16_t *coeff[MAX_MB_PLANE][3];
int16_t *qcoeff[MAX_MB_PLANE][2]; int16_t *qcoeff[MAX_MB_PLANE][3];
int16_t *dqcoeff[MAX_MB_PLANE][2]; int16_t *dqcoeff[MAX_MB_PLANE][3];
uint16_t *eobs[MAX_MB_PLANE][2]; uint16_t *eobs[MAX_MB_PLANE][3];
// dual buffer pointers, 0: in use, 1: best in store // dual buffer pointers, 0: in use, 1: best in store
int16_t *coeff_pbuf[MAX_MB_PLANE][2]; int16_t *coeff_pbuf[MAX_MB_PLANE][3];
int16_t *qcoeff_pbuf[MAX_MB_PLANE][2]; int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
int16_t *dqcoeff_pbuf[MAX_MB_PLANE][2]; int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
uint16_t *eobs_pbuf[MAX_MB_PLANE][2]; uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
int is_coded; int is_coded;
int num_4x4_blk; int num_4x4_blk;
...@@ -94,6 +94,7 @@ struct macroblock { ...@@ -94,6 +94,7 @@ struct macroblock {
MACROBLOCKD e_mbd; MACROBLOCKD e_mbd;
int skip_block; int skip_block;
int select_txfm_size; int select_txfm_size;
int skip_recode;
int skip_optimize; int skip_optimize;
int q_index; int q_index;
......
...@@ -377,6 +377,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, ...@@ -377,6 +377,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
const int mis = cm->mode_info_stride; const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize];
int max_plane;
assert(mi->mbmi.mode < MB_MODE_COUNT); assert(mi->mbmi.mode < MB_MODE_COUNT);
assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES); assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
...@@ -385,13 +386,21 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, ...@@ -385,13 +386,21 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
*mi_addr = *mi; *mi_addr = *mi;
for (i = 0; i < MAX_MB_PLANE; ++i) { max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].coeff = ctx->coeff_pbuf[i][1];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
pd[i].eobs = ctx->eobs_pbuf[i][1]; pd[i].eobs = ctx->eobs_pbuf[i][1];
} }
for (i = max_plane; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
pd[i].eobs = ctx->eobs_pbuf[i][2];
}
// Restore the coding context of the MB to that that was in place // Restore the coding context of the MB to that that was in place
// when the mode was picked for it // when the mode was picked for it
for (y = 0; y < mi_height; y++) for (y = 0; y < mi_height; y++)
...@@ -619,6 +628,7 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -619,6 +628,7 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
pd[i].eobs = ctx->eobs_pbuf[i][0]; pd[i].eobs = ctx->eobs_pbuf[i][0];
} }
ctx->is_coded = 0; ctx->is_coded = 0;
x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats // Set to zero to make sure we do not use the previous encoded frame stats
xd->mi_8x8[0]->mbmi.skip_coeff = 0; xd->mi_8x8[0]->mbmi.skip_coeff = 0;
...@@ -2406,6 +2416,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, ...@@ -2406,6 +2416,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mis = cm->mode_info_stride; const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8;
x->skip_optimize = ctx->is_coded; x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1; ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
......
...@@ -432,19 +432,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -432,19 +432,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
// TODO(jingning): per transformed block zero forcing only enabled for // TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well. // luma component. will integrate chroma components as well.
if (x->zcoeff_blk[tx_size][block] && plane == 0) { if (x->zcoeff_blk[tx_size][block] && plane == 0) {
int i, k; int i, j;
pd->eobs[block] = 0; pd->eobs[block] = 0;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &k); txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
ctx->ta[plane][i] = 0; ctx->ta[plane][i] = 0;
ctx->tl[plane][k] = 0; ctx->tl[plane][j] = 0;
return; return;
} }
if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) if (!x->skip_recode)
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
if (x->optimize && (x->select_txfm_size || if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8|| !x->skip_optimize)) {
vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
} else { } else {
int i, k; int i, k;
...@@ -515,10 +514,10 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { ...@@ -515,10 +514,10 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
struct optimize_ctx ctx; struct optimize_ctx ctx;
struct encode_b_args arg = {x, &ctx}; struct encode_b_args arg = {x, &ctx};
if (x->select_txfm_size || xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) if (!x->skip_recode)
vp9_subtract_sb(x, bsize); vp9_subtract_sb(x, bsize);
if (x->optimize) { if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
int i; int i;
for (i = 0; i < MAX_MB_PLANE; ++i) for (i = 0; i < MAX_MB_PLANE; ++i)
optimize_init_b(i, bsize, &arg); optimize_init_b(i, bsize, &arg);
...@@ -563,19 +562,22 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -563,19 +562,22 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 32 * (block & twmask); xoff = 32 * (block & twmask);
yoff = 32 * (block >> twl); yoff = 32 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff; dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
dst, pd->dst.stride, dst, pd->dst.stride); dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(32, 32, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride); if (!x->skip_recode) {
if (x->use_lp32x32fdct) src = p->src.buf + yoff * p->src.stride + xoff;
vp9_fdct32x32_rd(src_diff, coeff, bw * 4); src_diff = p->src_diff + 4 * bw * yoff + xoff;
else vp9_subtract_block(32, 32, src_diff, bw * 4,
vp9_fdct32x32(src_diff, coeff, bw * 4); src, p->src.stride, dst, pd->dst.stride);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, if (x->use_lp32x32fdct)
p->quant, p->quant_shift, qcoeff, dqcoeff, vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
pd->dequant, p->zbin_extra, eob, scan, iscan); else
vp9_fdct32x32(src_diff, coeff, bw * 4);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob) if (!x->skip_encode && *eob)
vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob);
break; break;
...@@ -588,16 +590,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -588,16 +590,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 16 * (block & twmask); xoff = 16 * (block & twmask);
yoff = 16 * (block >> twl); yoff = 16 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff; dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
dst, pd->dst.stride, dst, pd->dst.stride); dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(16, 16, src_diff, bw * 4, if (!x->skip_recode) {
src, p->src.stride, dst, pd->dst.stride); src = p->src.buf + yoff * p->src.stride + xoff;
vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, vp9_subtract_block(16, 16, src_diff, bw * 4,
p->quant, p->quant_shift, qcoeff, dqcoeff, src, p->src.stride, dst, pd->dst.stride);
pd->dequant, p->zbin_extra, eob, scan, iscan); vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob) if (!x->skip_encode && *eob)
vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break; break;
...@@ -610,16 +614,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -610,16 +614,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 8 * (block & twmask); xoff = 8 * (block & twmask);
yoff = 8 * (block >> twl); yoff = 8 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff; dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
dst, pd->dst.stride, dst, pd->dst.stride); dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(8, 8, src_diff, bw * 4, if (!x->skip_recode) {
src, p->src.stride, dst, pd->dst.stride); src = p->src.buf + yoff * p->src.stride + xoff;
vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, vp9_subtract_block(8, 8, src_diff, bw * 4,
p->quant_shift, qcoeff, dqcoeff, src, p->src.stride, dst, pd->dst.stride);
pd->dequant, p->zbin_extra, eob, scan, iscan); vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob) if (!x->skip_encode && *eob)
vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break; break;
...@@ -635,19 +641,23 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -635,19 +641,23 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 4 * (block & twmask); xoff = 4 * (block & twmask);
yoff = 4 * (block >> twl); yoff = 4 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff; dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
src = p->src.buf + yoff * p->src.stride + xoff;
src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
dst, pd->dst.stride, dst, pd->dst.stride); dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(4, 4, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride); if (!x->skip_recode) {
if (tx_type != DCT_DCT) src = p->src.buf + yoff * p->src.stride + xoff;
vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); src_diff = p->src_diff + 4 * bw * yoff + xoff;
else vp9_subtract_block(4, 4, src_diff, bw * 4,
x->fwd_txm4x4(src_diff, coeff, bw * 4); src, p->src.stride, dst, pd->dst.stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, if (tx_type != DCT_DCT)
p->quant_shift, qcoeff, dqcoeff, vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
pd->dequant, p->zbin_extra, eob, scan, iscan); else
x->fwd_txm4x4(src_diff, coeff, bw * 4);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
}
if (!x->skip_encode && *eob) { if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT) if (tx_type == DCT_DCT)
// this is like vp9_short_idct4x4 but has a special case around eob<=1 // this is like vp9_short_idct4x4 but has a special case around eob<=1
......
...@@ -535,6 +535,7 @@ void vp9_first_pass(VP9_COMP *cpi) { ...@@ -535,6 +535,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
pd[i].eobs = ctx->eobs_pbuf[i][1]; pd[i].eobs = ctx->eobs_pbuf[i][1];
} }
x->skip_recode = 0;
// Initialise the MV cost table to the defaults // Initialise the MV cost table to the defaults
......
...@@ -1452,7 +1452,7 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, ...@@ -1452,7 +1452,7 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t))); vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 2; ++k) { for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k], CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(int16_t))); vpx_memalign(16, num_pix * sizeof(int16_t)));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k], CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
...@@ -1474,7 +1474,7 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) { ...@@ -1474,7 +1474,7 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
vpx_free(ctx->zcoeff_blk); vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0; ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 2; ++k) { for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]); vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0; ctx->coeff[i][k] = 0;
vpx_free(ctx->qcoeff[i][k]); vpx_free(ctx->qcoeff[i][k]);
......
...@@ -246,7 +246,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { ...@@ -246,7 +246,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_set_speed_features(cpi); vp9_set_speed_features(cpi);
cpi->mb.select_txfm_size = cpi->sf.tx_size_search_method == USE_LARGESTALL ? cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ?
0 : 1; 0 : 1;
set_block_thresholds(cpi); set_block_thresholds(cpi);
...@@ -1329,6 +1330,7 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, ...@@ -1329,6 +1330,7 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
} }
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx,
int *rate, int *rate_tokenonly, int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable, int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) { BLOCK_SIZE bsize) {
...@@ -1364,6 +1366,27 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -1364,6 +1366,27 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate_tokenonly = this_rate_tokenonly; *rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion; *distortion = this_distortion;
*skippable = s; *skippable = s;
if (!x->select_txfm_size) {
int i;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = x->e_mbd.plane;
for (i = 1; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
pd[i].eobs = ctx->eobs_pbuf[i][2];
ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
ctx->coeff_pbuf[i][0] = p[i].coeff;
ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
ctx->eobs_pbuf[i][0] = pd[i].eobs;
}
}
} }
} }
...@@ -1389,8 +1412,9 @@ static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -1389,8 +1412,9 @@ static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
return this_rd; return this_rd;
} }
static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize, static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int *rate_uv, int *rate_uv_tokenonly, BLOCK_SIZE bsize, int *rate_uv,
int *rate_uv_tokenonly,
int64_t *dist_uv, int *skip_uv, int64_t *dist_uv, int *skip_uv,
MB_PREDICTION_MODE *mode_uv) { MB_PREDICTION_MODE *mode_uv) {
MACROBLOCK *const x = &cpi->mb; MACROBLOCK *const x = &cpi->mb;
...@@ -1403,7 +1427,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize, ...@@ -1403,7 +1427,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Else do a proper rd search for each possible transform size that may // Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop. // be considered in the main rd loop.
} else { } else {
rd_pick_intra_sbuv_mode(cpi, x, rd_pick_intra_sbuv_mode(cpi, x, ctx,
rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
} }
...@@ -3033,12 +3057,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3033,12 +3057,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
return this_rd; // if 0, this will be re-calculated by caller return this_rd; // if 0, this will be re-calculated by caller
} }
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int i; int max_plane) {
struct macroblock_plane *const p = x->plane; struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = x->e_mbd.plane; struct macroblockd_plane *const pd = x->e_mbd.plane;
int i;
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].coeff = ctx->coeff_pbuf[i][1];
pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
...@@ -3075,7 +3100,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3075,7 +3100,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = INT_MAX; *returnrate = INT_MAX;
return; return;
} }
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, bsize); &dist_uv, &uv_skip, bsize);
} else { } else {
y_skip = 0; y_skip = 0;
...@@ -3084,7 +3109,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3084,7 +3109,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = INT_MAX; *returnrate = INT_MAX;
return; return;
} }
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, BLOCK_8X8); &dist_uv, &uv_skip, BLOCK_8X8);
} }
...@@ -3450,7 +3475,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3450,7 +3475,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]); uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
if (rate_uv_intra[uv_tx] == INT_MAX) { if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx], choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
&rate_uv_tokenonly[uv_tx], &rate_uv_tokenonly[uv_tx],
&dist_uv[uv_tx], &skip_uv[uv_tx], &dist_uv[uv_tx], &skip_uv[uv_tx],
&mode_uv[uv_tx]); &mode_uv[uv_tx]);
...@@ -3584,6 +3609,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3584,6 +3609,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Did this mode help.. i.e. is it the new best mode // Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) { if (this_rd < best_rd || x->skip) {
int max_plane = MAX_MB_PLANE;
if (!mode_excluded) { if (!mode_excluded) {
// Note index of best mode so far // Note index of best mode so far
best_mode_index = mode_index; best_mode_index = mode_index;
...@@ -3591,6 +3617,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3591,6 +3617,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (ref_frame == INTRA_FRAME) { if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */ /* required for left and above block mv */
mbmi->mv[0].as_int = 0; mbmi->mv[0].as_int = 0;
max_plane = 1;
} }
*returnrate = rate2; *returnrate = rate2;
...@@ -3599,7 +3626,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3599,7 +3626,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_mbmode = *mbmi; best_mbmode = *mbmi;
best_skip2 = this_skip2; best_skip2 = this_skip2;
if (!x->select_txfm_size) if (!x->select_txfm_size)
swap_block_ptr(x, ctx); swap_block_ptr(x, ctx, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk); sizeof(uint8_t) * ctx->num_4x4_blk);
...@@ -3706,7 +3733,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3706,7 +3733,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Do Intra UV best rd mode selection if best mode choice above was intra. // Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) { if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size], rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size], &rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size], &dist_uv[uv_tx_size],
&skip_uv[uv_tx_size], &skip_uv[uv_tx_size],
...@@ -4075,7 +4102,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -4075,7 +4102,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += distortion_y; distortion2 += distortion_y;
if (rate_uv_intra[TX_4X4] == INT_MAX) { if (rate_uv_intra[TX_4X4] == INT_MAX) {
choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4], choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
&rate_uv_tokenonly[TX_4X4], &rate_uv_tokenonly[TX_4X4],
&dist_uv[TX_4X4], &skip_uv[TX_4X4], &dist_uv[TX_4X4], &skip_uv[TX_4X4],
&mode_uv[TX_4X4]); &mode_uv[TX_4X4]);
...@@ -4329,12 +4356,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -4329,12 +4356,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// Did this mode help.. i.e. is it the new best mode // Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) { if (this_rd < best_rd || x->skip) {
if (!mode_excluded) { if (!mode_excluded) {
int max_plane = MAX_MB_PLANE;
// Note index of best mode so far // Note index of best mode so far
best_mode_index = mode_index; best_mode_index = mode_index;
if (ref_frame == INTRA_FRAME) { if<