Commit 82775f61 authored by Cheng Chen's avatar Cheng Chen

Record total rate cost in trellis

Record total rate cost when computing trellis optimization.
Reduce redundant rate computation in later stages.

Speed impact: ~6% speed up
Coding performance should not be affected.

Change-Id: I9e940a2d126bb55930fcf22ea04d061eee1fc944
parent 28e9ce29
......@@ -443,7 +443,7 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode) {
const ENTROPY_CONTEXT *l, int fast_mode, int *rate_cost) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
const int eob = p->eobs[block];
......@@ -462,7 +462,7 @@ int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
return av1_optimize_txb(cpi, mb, plane, blk_row, blk_col, block, tx_size,
&txb_ctx, fast_mode);
&txb_ctx, fast_mode, rate_cost);
#endif // !CONFIG_LV_MAP
}
......@@ -587,6 +587,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint8_t *dst;
ENTROPY_CONTEXT *a, *l;
int dummy_rate_cost = 0;
int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
dst = &pd->dst
......@@ -603,7 +604,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, AV1_XFORM_QUANT_FP);
av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
tx_size, a, l, CONFIG_LV_MAP, &dummy_rate_cost);
} else {
av1_xform_quant(
cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
......@@ -884,6 +885,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
int dummy_rate_cost = 0;
av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
......@@ -914,7 +916,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
tx_size, a, l, CONFIG_LV_MAP, &dummy_rate_cost);
#if CONFIG_TXK_SEL
if (plane == 0 && p->eobs[block] == 0) {
......
......@@ -53,7 +53,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode);
const ENTROPY_CONTEXT *l, int fast_mode, int *rate_cost);
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
int blk_col, int blk_row, TX_SIZE tx_size);
......
......@@ -1538,7 +1538,8 @@ void test_try_change_eob(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
#if 1
static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
const LV_MAP_EOB_COST *txb_eob_costs,
TxbCache *txb_cache, int dry_run, int fast_mode) {
TxbCache *txb_cache, int dry_run, int fast_mode,
int *rate_cost) {
(void)fast_mode;
(void)txb_cache;
int update = 0;
......@@ -1586,7 +1587,7 @@ static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
txb_info->tx_type);
// backward optimize the level-k map
int64_t accu_rate = eob_cost;
int accu_rate = eob_cost;
int64_t accu_dist = 0;
int64_t prev_eob_rd_cost = INT64_MAX;
int64_t cur_eob_rd_cost = 0;
......@@ -1690,6 +1691,11 @@ static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
txb_info->eob = 0;
}
// record total rate cost
*rate_cost = zero_blk_rd_cost <= prev_eob_rd_cost
? zero_blk_rate
: accu_rate + non_zero_blk_rate;
#if TEST_OPTIMIZE_TXB
int cost_diff = 0;
int64_t dist_diff = 0;
......@@ -1718,7 +1724,8 @@ static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
#else
static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
TxbCache *txb_cache, int dry_run, int fast_mode) {
TxbCache *txb_cache, int dry_run, int fast_mode,
int *rate_cost) {
int update = 0;
if (txb_info->eob == 0) return update;
int cost_diff = 0;
......@@ -1849,12 +1856,13 @@ int hbt_hash_miss(int found_index, uint16_t hbt_hash_index,
const LV_MAP_EOB_COST *txb_eob_costs,
const struct macroblock_plane *p, int block, int fast_mode) {
const int16_t *scan = txb_info->scan_order->scan;
int dummy_rate_cost;
av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
txb_info->levels);
// The hash_based_trellis speed feature requires lv_map_multi, so always true.
const int update =
optimize_txb(txb_info, txb_costs, txb_eob_costs, NULL, 0, fast_mode);
const int update = optimize_txb(txb_info, txb_costs, txb_eob_costs, NULL, 0,
fast_mode, &dummy_rate_cost);
if (update) {
// Overwrite old lowest entry
......@@ -2026,7 +2034,7 @@ int hash_based_trellis_mode(TxbInfo *txb_info,
int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
int blk_row, int blk_col, int block, TX_SIZE tx_size,
TXB_CTX *txb_ctx, int fast_mode) {
TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) {
const AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const PLANE_TYPE plane_type = get_plane_type(plane);
......@@ -2109,8 +2117,8 @@ int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
av1_txb_init_levels(qcoeff, width, height, levels);
const int update =
optimize_txb(&txb_info, &txb_costs, &txb_eob_costs, NULL, 0, fast_mode);
const int update = optimize_txb(&txb_info, &txb_costs, &txb_eob_costs, NULL,
0, fast_mode, rate_cost);
if (update) {
p->eobs[block] = txb_info.eob;
......@@ -2372,6 +2380,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
uint16_t best_eob = 0;
RD_STATS best_rd_stats;
TX_TYPE tx_type;
int rate_cost = 0;
av1_invalid_rd_stats(&best_rd_stats);
......@@ -2396,15 +2405,23 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, AV1_XFORM_QUANT_FP);
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, 1);
tx_size, a, l, 1, &rate_cost);
}
av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size,
&this_rd_stats.dist, &this_rd_stats.sse,
OUTPUT_HAS_PREDICTED_PIXELS);
const int eob = x->plane[plane].eobs[block];
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
this_rd_stats.rate =
av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
scan_order, a, l, use_fast_coef_costing);
if (eob)
rate_cost +=
av1_tx_type_cost(cm, x, xd, mbmi->sb_type, plane, tx_size, tx_type);
else
rate_cost =
av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
scan_order, a, l, use_fast_coef_costing);
this_rd_stats.rate = rate_cost;
int64_t rd = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
if (rd < best_rd) {
......@@ -2436,7 +2453,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, AV1_XFORM_QUANT_FP);
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, 1);
tx_size, a, l, 1, &rate_cost);
}
av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
......
......@@ -104,7 +104,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
int av1_optimize_txb(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int blk_row,
int blk_col, int block, TX_SIZE tx_size, TXB_CTX *txb_ctx,
int fast_mode);
int fast_mode, int *rate_cost);
#ifdef __cplusplus
}
#endif
......
......@@ -1855,11 +1855,20 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
}
#if !CONFIG_TXK_SEL
const PLANE_TYPE plane_type = get_plane_type(plane);
const TX_TYPE tx_type =
av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size);
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
int rate_cost = 0;
// full forward transform and quantization
if (cpi->sf.optimize_coefficients != FULL_TRELLIS_OPT) {
av1_xform_quant(
cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
rate_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
scan_order, a, l, args->use_fast_coef_costing);
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
......@@ -1894,7 +1903,13 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
#endif
RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
tx_size, a, l, CONFIG_LV_MAP, &rate_cost);
const int eob = x->plane[plane].eobs[block];
if (!eob)
rate_cost =
av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
scan_order, a, l, args->use_fast_coef_costing);
} else {
args->exit_early = 1;
return;
......@@ -1917,14 +1932,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
args->exit_early = 1;
return;
}
const PLANE_TYPE plane_type = get_plane_type(plane);
const TX_TYPE tx_type =
av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size);
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
this_rd_stats.rate =
av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
scan_order, a, l, args->use_fast_coef_costing);
this_rd_stats.rate = rate_cost;
#else // !CONFIG_TXK_SEL
av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, a, l, args->use_fast_coef_costing,
......@@ -3539,6 +3548,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
const int16_t *diff =
&p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
int txb_coeff_cost;
int rate_cost = 0;
assert(tx_size < TX_SIZES_ALL);
......@@ -3603,6 +3613,8 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
rate_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
scan_order, a, l, 0);
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
......@@ -3645,7 +3657,18 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
#endif
RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, fast);
tx_size, a, l, fast, &rate_cost);
const int eob = x->plane[plane].eobs[block];
if (eob) {
#if CONFIG_TXK_SEL
rate_cost += av1_tx_type_cost(cm, x, xd, xd->mi[0]->mbmi.sb_type, plane,
tx_size, tx_type);
#endif
} else {
rate_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
tx_size, scan_order, a, l, 0);
}
} else {
rd_stats->rate += rd_stats->zero_rate;
rd_stats->dist += tmp << 4;
......@@ -3687,8 +3710,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
blk_row, blk_col, plane_bsize, txm_bsize);
}
cur_dist = tmp * 16;
txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
tx_size, scan_order, a, l, 0);
txb_coeff_cost = rate_cost;
cur_rate = txb_coeff_cost;
cur_skip = (eob == 0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment