From 3829cd2f2f9904572019aa047d068baeee843767 Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Wed, 13 Apr 2016 11:48:37 -0700 Subject: [PATCH] Move ZERO_TOKEN into the ANS coef tokenset. Change-Id: I87943e027437543ab31fa3ae1aa8b2de3a063ae5 --- vp10/common/ans.h | 53 ++++++++++++++- vp10/common/entropy.c | 19 ++++-- vp10/decoder/detokenize.c | 128 ++++++++++++++++++------------------- vp10/encoder/bitstream.c | 61 +++++++++--------- vp10/encoder/cost.c | 12 ++-- vp10/encoder/cost.h | 5 +- vp10/encoder/encodeframe.c | 6 +- vp10/encoder/rd.c | 14 ++-- vp10/encoder/rd.h | 6 ++ vp10/encoder/tokenize.c | 21 ++---- 10 files changed, 191 insertions(+), 134 deletions(-) diff --git a/vp10/common/ans.h b/vp10/common/ans.h index eaa7d5893..32babee34 100644 --- a/vp10/common/ans.h +++ b/vp10/common/ans.h @@ -266,7 +266,7 @@ struct rans_dec_sym { AnsP8 cum_prob; // not-inclusive }; -// This is now just a boring cdf. It starts with an explict zero. +// This is now just a boring cdf. It starts with an explicit zero. // TODO(aconverse): Remove starting zero. typedef uint16_t rans_dec_lut[16]; @@ -277,6 +277,57 @@ static INLINE void rans_build_cdf_from_pdf(const AnsP8 token_probs[], for (i = 1; cdf_tab[i - 1] < ans_p8_precision; ++i) { cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1]; } + assert(cdf_tab[i - 1] == ans_p8_precision); +} + +static INLINE int ans_find_largest(const AnsP8 *const pdf_tab, + int num_syms) { + int largest_idx = -1; + int largest_p = -1; + int i; + for (i = 0; i < num_syms; ++i) { + int p = pdf_tab[i]; + if (p > largest_p) { + largest_p = p; + largest_idx = i; + } + } + return largest_idx; +} + +static INLINE void rans_merge_prob_pdf(AnsP8 *const out_pdf, + const AnsP8 node_prob, + const AnsP8 *const src_pdf, + int in_syms) { + int i; + int adjustment = ans_p8_precision; + const int round_fact = ans_p8_precision >> 1; + const AnsP8 p1 = ans_p8_precision - node_prob; + const int out_syms = in_syms + 1; + assert(src_pdf != out_pdf); + + out_pdf[0] = node_prob; + adjustment -= node_prob; + for (i = 0; i < in_syms; ++i) { + int p = (p1 * src_pdf[i] + round_fact) >> ans_p8_shift; + p = VPXMIN(p, (int)ans_p8_precision - in_syms); + p = VPXMAX(p, 1); + out_pdf[i + 1] = p; + adjustment -= p; + } + + // Adjust probabilities so they sum to the total probability + if (adjustment > 0) { + i = ans_find_largest(out_pdf, out_syms); + out_pdf[i] += adjustment; + } else { + while (adjustment < 0) { + i = ans_find_largest(out_pdf, out_syms); + --out_pdf[i]; + assert(out_pdf[i] > 0); + adjustment++; + } + } } // rANS with normalization diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c index 909c3bd8e..d17b8549d 100644 --- a/vp10/common/entropy.c +++ b/vp10/common/entropy.c @@ -2799,6 +2799,15 @@ void vp10_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { } #if CONFIG_ANS +void vp10_build_token_cdfs(const vpx_prob *pdf_model, rans_dec_lut cdf) { + AnsP8 pdf_tab[ENTROPY_TOKENS - 1]; + assert(pdf_model[2] != 0); + rans_merge_prob_pdf(pdf_tab, pdf_model[1], + vp10_pareto8_token_probs[pdf_model[2] - 1], + ENTROPY_TOKENS - 2); + rans_build_cdf_from_pdf(pdf_tab, cdf); +} + void vp10_coef_pareto_cdfs(FRAME_CONTEXT *fc) { TX_SIZE t; int i, j, k, l; @@ -2806,13 +2815,9 @@ void vp10_coef_pareto_cdfs(FRAME_CONTEXT *fc) { for (i = 0; i < PLANE_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - const vpx_prob *const tree_probs = fc->coef_probs[t][i][j][k][l]; - vpx_prob pivot = tree_probs[PIVOT_NODE]; - assert(pivot != 0); - rans_build_cdf_from_pdf(vp10_pareto8_token_probs[pivot - 1], - fc->coef_cdfs[t][i][j][k][l]); - } + for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) + vp10_build_token_cdfs(fc->coef_probs[t][i][j][k][l], + fc->coef_cdfs[t][i][j][k][l]); } #endif // CONFIG_ANS diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c index 44cb58d32..953af567c 100644 --- a/vp10/decoder/detokenize.c +++ b/vp10/decoder/detokenize.c @@ -231,6 +231,7 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, const FRAME_CONTEXT *const fc = xd->fc; const int ref = is_inter_block(&xd->mi[0]->mbmi); int band, c = 0; + int skip_eob = 0; const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; const rans_dec_lut(*coef_cdfs)[COEFF_CONTEXTS] = @@ -296,87 +297,82 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, int val = -1; band = *band_translate++; prob = coef_probs[band][ctx]; - if (counts) - ++eob_branch_count[band][ctx]; - if (!uabs_read(ans, prob[EOB_CONTEXT_NODE])) { - INCREMENT_COUNT(EOB_MODEL_TOKEN); - break; + if (!skip_eob) { + if (counts) + ++eob_branch_count[band][ctx]; + if (!uabs_read(ans, prob[EOB_CONTEXT_NODE])) { + INCREMENT_COUNT(EOB_MODEL_TOKEN); + break; + } } - while (!uabs_read(ans, prob[ZERO_CONTEXT_NODE])) { + cdf = &coef_cdfs[band][ctx]; + token = ZERO_TOKEN + rans_read(ans, *cdf); + if (token == ZERO_TOKEN) { INCREMENT_COUNT(ZERO_TOKEN); - dqv = dq[1]; token_cache[scan[c]] = 0; - ++c; - if (c >= max_eob) - return c; // zero tokens at the end (no eob token) - ctx = get_coef_context(nb, token_cache, c); - band = *band_translate++; - prob = coef_probs[band][ctx]; - } - cdf = &coef_cdfs[band][ctx]; - - token = ONE_TOKEN + rans_read(ans, *cdf); - INCREMENT_COUNT(ONE_TOKEN + (token > ONE_TOKEN)); - switch (token) { - case ONE_TOKEN: - case TWO_TOKEN: - case THREE_TOKEN: - case FOUR_TOKEN: - val = token; - break; - case CATEGORY1_TOKEN: - val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, ans); - break; - case CATEGORY2_TOKEN: - val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, ans); - break; - case CATEGORY3_TOKEN: - val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, ans); - break; - case CATEGORY4_TOKEN: - val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, ans); - break; - case CATEGORY5_TOKEN: - val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, ans); - break; - case CATEGORY6_TOKEN: - { + skip_eob = 1; + } else { + INCREMENT_COUNT(ONE_TOKEN + (token > ONE_TOKEN)); + switch (token) { + case ONE_TOKEN: + case TWO_TOKEN: + case THREE_TOKEN: + case FOUR_TOKEN: + val = token; + break; + case CATEGORY1_TOKEN: + val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, ans); + break; + case CATEGORY2_TOKEN: + val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, ans); + break; + case CATEGORY3_TOKEN: + val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, ans); + break; + case CATEGORY4_TOKEN: + val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, ans); + break; + case CATEGORY5_TOKEN: + val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, ans); + break; + case CATEGORY6_TOKEN: { const int skip_bits = TX_SIZES - 1 - tx_size; const uint8_t *cat6p = cat6_prob + skip_bits; #if CONFIG_VP9_HIGHBITDEPTH - switch (xd->bd) { - case VPX_BITS_8: - val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans); - break; - case VPX_BITS_10: - val = CAT6_MIN_VAL + read_coeff(cat6p, 16 - skip_bits, ans); - break; - case VPX_BITS_12: - val = CAT6_MIN_VAL + read_coeff(cat6p, 18 - skip_bits, ans); - break; - default: - assert(0); - return -1; - } + switch (xd->bd) { + case VPX_BITS_8: + val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans); + break; + case VPX_BITS_10: + val = CAT6_MIN_VAL + read_coeff(cat6p, 16 - skip_bits, ans); + break; + case VPX_BITS_12: + val = CAT6_MIN_VAL + read_coeff(cat6p, 18 - skip_bits, ans); + break; + default: + assert(0); + return -1; + } #else - val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans); + val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans); #endif - } - break; - } - v = (val * dqv) >> dq_shift; + } break; + } + v = (val * dqv) >> dq_shift; #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VP9_HIGHBITDEPTH - dqcoeff[scan[c]] = highbd_check_range((uabs_read_bit(ans) ? -v : v), - xd->bd); + dqcoeff[scan[c]] = + highbd_check_range((uabs_read_bit(ans) ? -v : v), xd->bd); #else - dqcoeff[scan[c]] = check_range(uabs_read_bit(ans) ? -v : v); + dqcoeff[scan[c]] = check_range(uabs_read_bit(ans) ? -v : v); #endif // CONFIG_VP9_HIGHBITDEPTH #else - dqcoeff[scan[c]] = uabs_read_bit(ans) ? -v : v; + dqcoeff[scan[c]] = uabs_read_bit(ans) ? -v : v; #endif // CONFIG_COEFFICIENT_RANGE_CHECKING - token_cache[scan[c]] = vp10_pt_energy_class[token]; + token_cache[scan[c]] = vp10_pt_energy_class[token]; + skip_eob = 0; + } ++c; ctx = get_coef_context(nb, token_cache, c); dqv = dq[1]; diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 47c09423f..6d903ea46 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -706,40 +706,37 @@ static void pack_mb_tokens_ans(struct BufAnsCoder *ans, buf_uabs_write(ans, t != EOB_TOKEN, p->context_tree[0]); if (t != EOB_TOKEN) { - buf_uabs_write(ans, t != ZERO_TOKEN, p->context_tree[1]); - - if (t != ZERO_TOKEN) { - struct rans_sym s; - const rans_dec_lut *token_cdf = p->token_cdf; - s.cum_prob = (*token_cdf)[t - ONE_TOKEN]; - s.prob = (*token_cdf)[t - ONE_TOKEN + 1] - s.cum_prob; - buf_rans_write(ans, &s); - } - } - - if (b->base_val) { - const int e = p->extra, l = b->len; - int skip_bits = (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0; - - if (l) { - const unsigned char *pb = b->prob; - int v = e >> 1; - int n = l; /* number of bits in v, assumed nonzero */ - int i = 0; + struct rans_sym s; + const rans_dec_lut *token_cdf = p->token_cdf; + assert(token_cdf); + s.cum_prob = (*token_cdf)[t - ZERO_TOKEN]; + s.prob = (*token_cdf)[t - ZERO_TOKEN + 1] - s.cum_prob; + buf_rans_write(ans, &s); + + if (b->base_val) { + const int e = p->extra, l = b->len; + int skip_bits = (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0; + + if (l) { + const unsigned char *pb = b->prob; + int v = e >> 1; + int n = l; /* number of bits in v, assumed nonzero */ + int i = 0; + + do { + const int bb = (v >> --n) & 1; + if (skip_bits) { + skip_bits--; + assert(!bb); + } else { + buf_uabs_write(ans, bb, pb[i >> 1]); + } + i = b->tree[i + bb]; + } while (n); + } - do { - const int bb = (v >> --n) & 1; - if (skip_bits) { - skip_bits--; - assert(!bb); - } else { - buf_uabs_write(ans, bb, pb[i >> 1]); - } - i = b->tree[i + bb]; - } while (n); + buf_uabs_write(ans, e & 1, 128); } - - buf_uabs_write(ans, e & 1, 128); } ++p; diff --git a/vp10/encoder/cost.c b/vp10/encoder/cost.c index 0ed41405e..6318ad300 100644 --- a/vp10/encoder/cost.c +++ b/vp10/encoder/cost.c @@ -10,6 +10,9 @@ #include #include "vp10/encoder/cost.h" +#if CONFIG_ANS +#include "vp10/common/ans.h" +#endif // CONFIG_ANS #include "vp10/common/entropy.h" /* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT)) @@ -57,16 +60,15 @@ static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, #if CONFIG_ANS void vp10_cost_tokens_ans(int *costs, const vpx_prob *tree_probs, - const vpx_prob *token_probs, int skip_eob) { + const rans_dec_lut *token_cdf, int skip_eob) { int c_tree = 0; // Cost of the "tree" nodes EOB and ZERO. int i; costs[EOB_TOKEN] = vp10_cost_bit(tree_probs[0], 0); if (!skip_eob) c_tree = vp10_cost_bit(tree_probs[0], 1); - costs[ZERO_TOKEN] = c_tree + vp10_cost_bit(tree_probs[1], 0); - c_tree += vp10_cost_bit(tree_probs[1], 1); - for (i = ONE_TOKEN; i <= CATEGORY6_TOKEN; ++i) { - costs[i] = c_tree + vp10_cost_bit(token_probs[i - ONE_TOKEN], 0); + for (i = ZERO_TOKEN; i <= CATEGORY6_TOKEN; ++i) { + const int p = (*token_cdf)[i + 1] - (*token_cdf)[i]; + costs[i] = c_tree + vp10_cost_bit(p, 0); } } #endif // CONFIG_ANS diff --git a/vp10/encoder/cost.h b/vp10/encoder/cost.h index 56d91001f..3863875f3 100644 --- a/vp10/encoder/cost.h +++ b/vp10/encoder/cost.h @@ -13,6 +13,9 @@ #include "vpx_dsp/prob.h" #include "vpx/vpx_integer.h" +#if CONFIG_ANS +#include "vp10/common/ans.h" +#endif // CONFIG_ANS #ifdef __cplusplus extern "C" { @@ -54,7 +57,7 @@ void vp10_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree); #if CONFIG_ANS void vp10_cost_tokens_ans(int *costs, const vpx_prob *tree_probs, - const vpx_prob *token_probs, int skip_eob); + const rans_dec_lut *token_cdf, int skip_eob); #endif #ifdef __cplusplus diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 45505f166..a663df9ff 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -4214,7 +4214,11 @@ static void encode_rd_sb_row(VP10_COMP *cpi, cpi->td.rd_counts.coef_counts); vp10_copy(subframe_stats->eob_counts_buf[cm->coef_probs_update_idx], cm->counts.eob_branch); - vp10_fill_token_costs(x->token_costs, cm->fc->coef_probs); + vp10_fill_token_costs(x->token_costs, +#if CONFIG_ANS + cm->fc->coef_cdfs, +#endif // CONFIG_ANS + cm->fc->coef_probs); } } #endif // CONFIG_ENTROPY diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c index dc34f1f54..3e48d28a2 100644 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c @@ -153,6 +153,9 @@ static void fill_mode_costs(VP10_COMP *cpi) { } void vp10_fill_token_costs(vp10_coeff_cost *c, +#if CONFIG_ANS + coeff_cdf_model (*cdf)[PLANE_TYPES], +#endif // CONFIG_ANS vp10_coeff_probs_model (*p)[PLANE_TYPES]) { int i, j, k, l; TX_SIZE t; @@ -163,11 +166,10 @@ void vp10_fill_token_costs(vp10_coeff_cost *c, for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { #if CONFIG_ANS const vpx_prob *const tree_probs = p[t][i][j][k][l]; - vpx_prob pivot = tree_probs[PIVOT_NODE]; vp10_cost_tokens_ans((int *)c[t][i][j][k][0][l], tree_probs, - vp10_pareto8_token_probs[pivot - 1], 0); + &cdf[t][i][j][k][l], 0); vp10_cost_tokens_ans((int *)c[t][i][j][k][1][l], tree_probs, - vp10_pareto8_token_probs[pivot - 1], 1); + &cdf[t][i][j][k][l], 1); #else vpx_prob probs[ENTROPY_NODES]; vp10_model_to_full_probs(p[t][i][j][k][l], probs); @@ -392,7 +394,11 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) { #endif } if (cpi->oxcf.pass != 1) { - vp10_fill_token_costs(x->token_costs, cm->fc->coef_probs); + vp10_fill_token_costs(x->token_costs, +#if CONFIG_ANS + cm->fc->coef_cdfs, +#endif // CONFIG_ANS + cm->fc->coef_probs); if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || cm->frame_type == KEY_FRAME) { diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h index 7aad9ebf0..e63863874 100644 --- a/vp10/encoder/rd.h +++ b/vp10/encoder/rd.h @@ -13,6 +13,9 @@ #include +#if CONFIG_ANS +#include "vp10/common/ans.h" +#endif // CONFIG_ANS #include "vp10/common/blockd.h" #include "vp10/encoder/block.h" @@ -342,6 +345,9 @@ void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm, int bsize, int best_mode_index); void vp10_fill_token_costs(vp10_coeff_cost *c, +#if CONFIG_ANS + coeff_cdf_model (*cdf)[PLANE_TYPES], +#endif // CONFIG_ANS vp10_coeff_probs_model (*p)[PLANE_TYPES]); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index 5aed80c45..7847e9696 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -476,6 +476,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, td->counts->eob_branch[tx_size][type][ref]; const uint8_t *const band = get_band_translate(tx_size); const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); + int skip_eob = 0; int16_t token; EXTRABIT extra; pt = get_entropy_context(tx_size, pd->above_context + blk_col, @@ -485,22 +486,8 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, c = 0; while (c < eob) { - int v = 0; - int skip_eob = 0; - v = qcoeff[scan[c]]; - - while (!v) { - add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob, - counts[band[c]][pt]); - eob_branch[band[c]][pt] += !skip_eob; - - skip_eob = 1; - token_cache[scan[c]] = 0; - ++c; - pt = get_coef_context(nb, token_cache, c); - v = qcoeff[scan[c]]; - } - assert(c < eob); + const int v = qcoeff[scan[c]]; + eob_branch[band[c]][pt] += !skip_eob; vp10_get_token_extra(v, &token, &extra); @@ -509,11 +496,11 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, &coef_cdfs[band[c]][pt], #endif // CONFIG_ANS extra, (uint8_t)token, (uint8_t)skip_eob, counts[band[c]][pt]); - eob_branch[band[c]][pt] += !skip_eob; token_cache[scan[c]] = vp10_pt_energy_class[token]; ++c; pt = get_coef_context(nb, token_cache, c); + skip_eob = (token == ZERO_TOKEN); } if (c < seg_eob) { add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0, -- GitLab