Commit b8b3f1a4 authored by Deb Mukherjee's avatar Deb Mukherjee

Balancing coef-tree to reduce bool decodes

This patch changes the coefficient tree to move the EOB to below
the ZERO node in order to save number of bool decodes.

The advantages of moving EOB one step down as opposed to two steps down
in the other parallel patch are: 1. The coef modeling based on
the One-node becomes independent of the tree structure above it, and
2. Fewer conext/counter increases are needed.

The drawback is that the potential savings in bool decodes will be
less, but assuming that 0s are much more predominant than 1's the
potential savings is still likely to be substantial.

Results on derf300: -0.237%

Change-Id: Ie784be13dc98291306b338e8228703a4c2ea2242
parent 38cb616f
......@@ -244,6 +244,7 @@ EXPERIMENT_LIST="
multiple_arf
non420
alpha
balanced_coeftree
"
CONFIG_LIST="
external_build
......
This diff is collapsed.
......@@ -201,11 +201,16 @@ DECLARE_ALIGNED(16, const int, vp9_default_scan_32x32[1024]) = {
const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
{
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
-ONE_TOKEN, 6, /* 2 = ONE */
8, 12, /* 3 = LOW_VAL */
-TWO_TOKEN, 10, /* 4 = TWO */
#if CONFIG_BALANCED_COEFTREE
-ZERO_TOKEN, 2, /* 0 = ZERO */
-DCT_EOB_TOKEN, 4, /* 1 = EOB */
#else
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
#endif
-ONE_TOKEN, 6, /* 2 = ONE */
8, 12, /* 3 = LOW_VAL */
-TWO_TOKEN, 10, /* 4 = TWO */
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
14, 16, /* 6 = HIGH_LOW */
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
......@@ -229,9 +234,14 @@ static const vp9_prob Pcat6[] = {
};
const vp9_tree_index vp9_coefmodel_tree[6] = {
#if CONFIG_BALANCED_COEFTREE
-ZERO_TOKEN, 2,
-DCT_EOB_MODEL_TOKEN, 4,
#else
-DCT_EOB_MODEL_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
-ONE_TOKEN, -TWO_TOKEN, /* 2 = ONE */
#endif
-ONE_TOKEN, -TWO_TOKEN,
};
// Model obtained from a 2-sided zero-centerd distribuition derived
......@@ -690,8 +700,13 @@ static void adapt_coef_probs(
vp9_coefmodel_tree,
coef_probs, branch_ct,
coef_counts[i][j][k][l], 0);
#if CONFIG_BALANCED_COEFTREE
branch_ct[1][1] = eob_branch_count[i][j][k][l] - branch_ct[1][0];
coef_probs[1] = get_binary_prob(branch_ct[1][0], branch_ct[1][1]);
#else
branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
#endif
for (t = 0; t < entropy_nodes_adapt; ++t) {
count = branch_ct[t][0] + branch_ct[t][1];
count = count > count_sat ? count_sat : count;
......
......@@ -85,8 +85,7 @@ extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
#define PREV_COEF_CONTEXTS 6
// Disables backward coef probs adaption
// #define DISABLE_COEF_ADAPT
// #define ENTROPY_STATS
typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS];
......@@ -162,6 +161,7 @@ const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad);
#define UNCONSTRAINED_NODES 3
#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
#define PIVOT_NODE 2 // which node is pivot
typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS]
......
......@@ -1073,9 +1073,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
if (vp9_read(&header_bc, 252))
pc->fc.vp9_mode_contexts[i][j] = vp9_read_prob(&header_bc);
}
// Is this needed ?
if (keyframe)
vp9_default_coef_probs(pc);
update_frame_context(&pc->fc);
......
......@@ -17,8 +17,13 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/common/vp9_seg_common.h"
#if CONFIG_BALANCED_COEFTREE
#define ZERO_CONTEXT_NODE 0
#define EOB_CONTEXT_NODE 1
#else
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#endif
#define ONE_CONTEXT_NODE 2
#define LOW_VAL_CONTEXT_NODE 3
#define TWO_CONTEXT_NODE 4
......@@ -111,6 +116,9 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
const int *scan, *nb;
uint8_t token_cache[1024];
const uint8_t * band_translate;
#if CONFIG_BALANCED_COEFTREE
int skip_eob_node = 0;
#endif
switch (txfm_size) {
default:
......@@ -182,11 +190,13 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
c, default_eob);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];
#if !CONFIG_BALANCED_COEFTREE
fc->eob_branch_counts[txfm_size][type][ref][band][pt]++;
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
SKIP_START:
#endif
if (c >= seg_eob)
break;
if (c)
......@@ -198,8 +208,22 @@ SKIP_START:
if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
++c;
#if CONFIG_BALANCED_COEFTREE
skip_eob_node = 1;
continue;
#else
goto SKIP_START;
#endif
}
#if CONFIG_BALANCED_COEFTREE
if (!skip_eob_node) {
fc->eob_branch_counts[txfm_size][type][ref][band][pt]++;
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
}
skip_eob_node = 0;
#endif
// ONE_CONTEXT_NODE_0_
if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
WRITE_COEF_CONTINUE(1, ONE_TOKEN);
......
......@@ -473,7 +473,6 @@ static void pack_mb_tokens(vp9_writer* const bc,
const vp9_prob *pp;
int v = a->value;
int n = a->len;
int ncount = n;
vp9_prob probs[ENTROPY_NODES];
if (t == EOSB_TOKEN) {
......@@ -489,18 +488,25 @@ static void pack_mb_tokens(vp9_writer* const bc,
assert(pp != 0);
/* skip one or two nodes */
#if !CONFIG_BALANCED_COEFTREE
if (p->skip_eob_node) {
n -= p->skip_eob_node;
i = 2 * p->skip_eob_node;
ncount -= p->skip_eob_node;
}
#endif
do {
const int bb = (v >> --n) & 1;
#if CONFIG_BALANCED_COEFTREE
if (i == 2 && p->skip_eob_node) {
i += 2;
assert(bb == 1);
continue;
}
#endif
vp9_write(bc, bb, pp[i >> 1]);
i = vp9_coef_tree[i + bb];
ncount--;
} while (n && ncount);
} while (n);
if (b->base_val) {
const int e = p->extra, l = b->len;
......@@ -871,8 +877,11 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
const MB_PREDICTION_MODE A = above_block_mode(m, i, mis);
const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
left_block_mode(m, i) : DC_PRED;
write_kf_bmode(bc, m->bmi[i].as_mode.first,
c->kf_bmode_prob[A][L]);
const int bm = m->bmi[i].as_mode.first;
#ifdef ENTROPY_STATS
++intra_mode_stats[A][L][bm];
#endif
write_kf_bmode(bc, bm, c->kf_bmode_prob[A][L]);
}
}
}
......@@ -1075,11 +1084,19 @@ static void build_tree_distribution(vp9_coeff_probs_model *coef_probs,
coef_probs[i][j][k][l],
coef_branch_ct[i][j][k][l],
model_counts, 0);
#if CONFIG_BALANCED_COEFTREE
coef_branch_ct[i][j][k][l][1][1] = eob_branch_ct[i][j][k][l] -
coef_branch_ct[i][j][k][l][1][0];
coef_probs[i][j][k][l][1] =
get_binary_prob(coef_branch_ct[i][j][k][l][1][0],
coef_branch_ct[i][j][k][l][1][1]);
#else
coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] -
coef_branch_ct[i][j][k][l][0][0];
coef_probs[i][j][k][l][0] =
get_binary_prob(coef_branch_ct[i][j][k][l][0][0],
coef_branch_ct[i][j][k][l][0][1]);
#endif
#ifdef ENTROPY_STATS
if (!cpi->dummy_packing) {
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
......
......@@ -133,7 +133,11 @@ struct macroblock {
unsigned char *active_ptr;
// note that token_costs is the cost when eob node is skipped
vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES];
#if CONFIG_BALANCED_COEFTREE
vp9_coeff_count token_costs_noskip[TX_SIZE_MAX_SB][BLOCK_TYPES];
#endif
int optimize;
......
......@@ -10,6 +10,7 @@
#include <assert.h>
#include "vp9/encoder/vp9_boolhuff.h"
#include "vp9/common/vp9_entropy.h"
#if defined(SECTIONBITS_OUTPUT)
unsigned __int64 Sectionbits[500];
......
......@@ -215,10 +215,21 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
band = get_coef_band(band_translate, i + 1);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
#if CONFIG_BALANCED_COEFTREE
rate0 +=
mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
mb->token_costs_noskip[tx_size][type][ref][band][pt]
[tokens[next][0].token];
rate1 +=
mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
mb->token_costs_noskip[tx_size][type][ref][band][pt]
[tokens[next][1].token];
#else
rate0 +=
mb->token_costs[tx_size][type][ref][band][pt]
[tokens[next][0].token];
rate1 +=
mb->token_costs[tx_size][type][ref][band][pt]
[tokens[next][1].token];
#endif
}
UPDATE_RD_COST();
/* And pick the best. */
......@@ -266,14 +277,32 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
#if CONFIG_BALANCED_COEFTREE
if (!x)
rate0 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][0].token];
else
rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
tokens[next][0].token];
#else
rate0 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][0].token];
#endif
}
if (t1 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
pad, default_eob);
#if CONFIG_BALANCED_COEFTREE
if (!x)
rate1 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][1].token];
else
rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
tokens[next][1].token];
#else
rate1 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][1].token];
#endif
}
}
......@@ -326,8 +355,13 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
#if CONFIG_BALANCED_COEFTREE
rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t0];
rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t1];
#else
rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
#endif
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = i0 - 1;
......
......@@ -284,7 +284,6 @@ static void setup_features(VP9_COMP *cpi) {
set_default_lf_deltas(cpi);
}
static void dealloc_compressor_data(VP9_COMP *cpi) {
// Delete sementation map
vpx_free(cpi->segmentation_map);
......@@ -2935,9 +2934,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
#endif
// transform / motion compensation build reconstruction frame
if (cm->frame_type == KEY_FRAME) {
vp9_default_coef_probs(cm);
}
vp9_encode_frame(cpi);
......
......@@ -108,11 +108,31 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
};
#if CONFIG_BALANCED_COEFTREE
static void fill_token_costs(vp9_coeff_count *c,
vp9_coeff_count *cnoskip,
vp9_coeff_probs_model *p,
TX_SIZE tx_size) {
int i, j, k, l;
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
vp9_prob probs[ENTROPY_NODES];
vp9_model_to_full_probs(p[i][j][k][l], probs);
vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs,
vp9_coef_tree);
// Replace the eob node prob with a very small value so that the
// cost approximately equals the cost without the eob node
probs[1] = 1;
vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree);
}
}
#else
static void fill_token_costs(vp9_coeff_count *c,
vp9_coeff_probs_model *p,
TX_SIZE tx_size) {
int i, j, k, l;
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
......@@ -123,6 +143,7 @@ static void fill_token_costs(vp9_coeff_count *c,
vp9_coef_tree);
}
}
#endif
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
......@@ -213,14 +234,29 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
}
}
#if CONFIG_BALANCED_COEFTREE
fill_token_costs(cpi->mb.token_costs[TX_4X4],
cpi->mb.token_costs_noskip[TX_4X4],
cpi->common.fc.coef_probs_4x4, TX_4X4);
fill_token_costs(cpi->mb.token_costs[TX_8X8],
cpi->mb.token_costs_noskip[TX_8X8],
cpi->common.fc.coef_probs_8x8, TX_8X8);
fill_token_costs(cpi->mb.token_costs[TX_16X16],
cpi->mb.token_costs_noskip[TX_16X16],
cpi->common.fc.coef_probs_16x16, TX_16X16);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
cpi->mb.token_costs_noskip[TX_32X32],
cpi->common.fc.coef_probs_32x32, TX_32X32);
#else
fill_token_costs(cpi->mb.token_costs[TX_4X4],
cpi->common.fc.coef_probs_4x4, TX_4X4);
fill_token_costs(cpi->mb.token_costs[TX_8X8],
cpi->common.fc.coef_probs_8x8, TX_8X8);
fill_token_costs(cpi->mb.token_costs[TX_16X16],
cpi->common.fc.coef_probs_16x16, TX_16X16);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
cpi->common.fc.coef_probs_32x32, TX_32X32);
#endif
for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
vp9_cost_tokens(cpi->mb.partition_cost[i],
......@@ -274,7 +310,13 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = DCT_DCT;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
#if CONFIG_BALANCED_COEFTREE
unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
mb->token_costs_noskip[tx_size][type][ref];
#else
vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
#endif
int seg_eob, default_eob;
uint8_t token_cache[1024];
const uint8_t * band_translate;
......@@ -294,8 +336,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
get_tx_type_4x4(xd, block) : DCT_DCT;
above_ec = A[0] != 0;
left_ec = L[0] != 0;
#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
coef_probs);
#endif
seg_eob = 16;
scan = get_scan_4x4(tx_type);
band_translate = vp9_coefband_trans_4x4;
......@@ -310,8 +354,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
above_ec = (A[0] + A[1]) != 0;
left_ec = (L[0] + L[1]) != 0;
scan = get_scan_8x8(tx_type);
#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
coef_probs);
#endif
seg_eob = 64;
band_translate = vp9_coefband_trans_8x8plus;
break;
......@@ -323,8 +369,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
scan = get_scan_16x16(tx_type);
#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
coef_probs);
#endif
seg_eob = 256;
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
......@@ -333,8 +381,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
}
case TX_32X32:
scan = vp9_default_scan_32x32;
#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
coef_probs);
#endif
seg_eob = 1024;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
......@@ -365,18 +415,30 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_BALANCED_COEFTREE
if (!c || token_cache[scan[c - 1]]) // do not skip eob
cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
else
cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#else
cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
if (!c || token_cache[scan[c - 1]])
cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
#endif
token_cache[scan[c]] = vp9_pt_energy_class[t];
}
if (c < seg_eob) {
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_BALANCED_COEFTREE
cost += mb->token_costs_noskip[tx_size][type][ref]
[get_coef_band(band_translate, c)]
[pt][DCT_EOB_TOKEN];
#else
cost += mb->token_costs[tx_size][type][ref]
[get_coef_band(band_translate, c)]
[pt][DCT_EOB_TOKEN];
#endif
}
}
......
......@@ -224,11 +224,21 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
t->token = token;
t->context_tree = coef_probs[type][ref][band][pt];
t->skip_eob_node = (c > 0) && (token_cache[scan[c - 1]] == 0);
#if CONFIG_BALANCED_COEFTREE
assert(token <= ZERO_TOKEN ||
vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
#else
assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
#endif
if (!dry_run) {
++counts[type][ref][band][pt][token];
#if CONFIG_BALANCED_COEFTREE
if (!t->skip_eob_node && token > ZERO_TOKEN)
#else
if (!t->skip_eob_node)
#endif
++cpi->common.fc.eob_branch_counts[tx_size][type][ref][band][pt];
}
token_cache[scan[c]] = vp9_pt_energy_class[token];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment