diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc index 441583ad1b09df30c88340ab4131b0cef1377ccc..363161d7a428aefa9ee041e5cbc53dd289f75e88 100644 --- a/test/vp10_ans_test.cc +++ b/test/vp10_ans_test.cc @@ -147,7 +147,7 @@ bool check_vpxbool(const PvVec &pv_vec, uint8_t *buf) { } const rans_sym rans_sym_tab[] = { - {70, 186}, {70, 116}, {100, 16}, {16, 0}, + {16, 0}, {100, 16}, {70, 116}, {70, 186}, }; const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]); @@ -170,13 +170,9 @@ std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) { void rans_build_dec_tab(const struct rans_sym sym_tab[], rans_dec_lut dec_tab) { - int val = 0; - int i; - for (i = ans_p8_precision - 1; i >= 0; --i) { - dec_tab[i].val = val; - dec_tab[i].prob = sym_tab[val].prob; - dec_tab[i].cum_prob = sym_tab[val].cum_prob; - if (i == sym_tab[val].cum_prob) ++val; + dec_tab[0] = 0; + for (int i = 1; dec_tab[i - 1] < ans_p8_precision; ++i) { + dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob; } } diff --git a/vp10/common/ans.h b/vp10/common/ans.h index a1862f30d057273db5d2c3d87cab949db0676453..163a7a956afb4c9bdec0c803648d6500e2951e05 100644 --- a/vp10/common/ans.h +++ b/vp10/common/ans.h @@ -241,23 +241,16 @@ struct rans_dec_sym { AnsP8 cum_prob; // not-inclusive }; -typedef struct rans_dec_sym rans_dec_lut[ans_p8_precision]; +// This is now just a boring cdf. It starts with an explict zero. +// TODO(aconverse): Remove starting zero. +typedef uint16_t rans_dec_lut[16]; -static INLINE void rans_build_dec_tab(const AnsP8 token_probs[], - rans_dec_lut dec_tab) { - int val = 0; - int cum_prob = 0; - int sym_end = token_probs[0]; +static INLINE void rans_build_cdf_from_pdf(const AnsP8 token_probs[], + rans_dec_lut cdf_tab) { int i; - for (i = 0; i < 256; ++i) { - if (i == sym_end) { - ++val; - cum_prob = sym_end; - sym_end += token_probs[val]; - } - dec_tab[i].val = val; - dec_tab[i].prob = token_probs[val]; - dec_tab[i].cum_prob = cum_prob; + cdf_tab[0] = 0; + for (i = 1; cdf_tab[i - 1] < ans_p8_precision; ++i) { + cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1]; } } @@ -275,20 +268,32 @@ static INLINE void rans_write(struct AnsCoder *ans, (ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob; } +static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf, + AnsP8 rem) { + int i = 0; + // TODO(skal): if critical, could be a binary search. + // Or, better, an O(1) alias-table. + while (rem >= cdf[i]) { + ++i; + } + out->val = i - 1; + out->prob = cdf[i] - cdf[i - 1]; + out->cum_prob = cdf[i - 1]; +} + static INLINE int rans_read(struct AnsDecoder *ans, const rans_dec_lut tab) { unsigned rem; unsigned quo; - int val; + struct rans_dec_sym sym; if (ans->state < l_base && ans->buf_offset > 0) { ans->state = ans->state * io_base + ans->buf[--ans->buf_offset]; } quo = ans->state / ans_p8_precision; rem = ans->state % ans_p8_precision; - val = tab[rem].val; - - ans->state = quo * tab[rem].prob + rem - tab[rem].cum_prob; - return val; + fetch_sym(&sym, tab, rem); + ans->state = quo * sym.prob + rem - sym.cum_prob; + return sym.val; } static INLINE int ans_read_init(struct AnsDecoder *const ans, diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c index 8c7e27a9bc471ace02b2f4b271be494247621a31..df03224bfcfdf4d817a5c1c099ac3c852eefcd07 100644 --- a/vp10/common/entropy.c +++ b/vp10/common/entropy.c @@ -676,12 +676,12 @@ const vpx_prob vp10_pareto8_token_probs[COEFF_PROB_MODELS] {247, 1, 1, 1, 1, 1, 1, 1, 1, 1}, }; -void vp10_build_pareto8_dec_tab( +void vp10_build_pareto8_cdf_tab( const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2], - rans_dec_lut dec_tab[COEFF_PROB_MODELS]) { + rans_dec_lut cdf_tab[COEFF_PROB_MODELS]) { int p; for (p = 0; p < COEFF_PROB_MODELS; ++p) { - rans_build_dec_tab(token_probs[p], dec_tab[p]); + rans_build_cdf_from_pdf(token_probs[p], cdf_tab[p]); } } #endif // CONFIG_ANS diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h index 4fa330e33ff530f3104d5dd714c6add4647ee322..8ea01be4b639f2bf547b8a9a3b2477673a970878 100644 --- a/vp10/common/entropy.h +++ b/vp10/common/entropy.h @@ -170,9 +170,9 @@ extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; extern const vpx_prob vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2]; -void vp10_build_pareto8_dec_tab( +void vp10_build_pareto8_cdf_tab( const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2], - rans_dec_lut dec_tab[COEFF_PROB_MODELS]); + rans_dec_lut cdf_tab[COEFF_PROB_MODELS]); #endif // CONFIG_ANS typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS] diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index 02d7e1761ce7415f1fa012a84ddc25cc5431b7ae..bcc69f3bd8fa64aeb02bfadc9ddbc920395fd2ea 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -324,6 +324,9 @@ typedef struct VP10Common { // - this is intentionally not placed in FRAME_CONTEXT since it's reset upon // each keyframe and not used afterwards vpx_prob kf_y_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1]; +#if CONFIG_ANS + rans_dec_lut token_tab[COEFF_PROB_MODELS]; +#endif // CONFIG_ANS } VP10_COMMON; // TODO(hkuang): Don't need to lock the whole pool after implementing atomic diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 5c5c31caf81cf304cb19b57745da752a1fa2ea40..64ac3ccf3b5539a0b1c99f71a09a32200c5c7d83 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -1858,7 +1858,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, for (col = 0; col < max_blocks_wide; col += step) predict_and_reconstruct_intra_block(xd, #if CONFIG_ANS - pbi->token_tab, tok, + cm->token_tab, tok, #else r, #endif @@ -1959,7 +1959,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, for (col = 0; col < max_blocks_wide; col += step) eobtotal += reconstruct_inter_block(xd, #if CONFIG_ANS - pbi->token_tab, tok, + cm->token_tab, tok, #else r, #endif diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c index 13a590a05dcf986bf5fd3bca9ac3d4495f2a3971..35c53df6d5902e8fa0f6d3d362902d9fd93b1d77 100644 --- a/vp10/decoder/decoder.c +++ b/vp10/decoder/decoder.c @@ -119,7 +119,7 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) { vp10_loop_restoration_precal(); #endif // CONFIG_LOOP_RESTORATION #if CONFIG_ANS - vp10_build_pareto8_dec_tab(vp10_pareto8_token_probs, pbi->token_tab); + vp10_build_pareto8_cdf_tab(vp10_pareto8_token_probs, cm->token_tab); #endif // CONFIG_ANS cm->error.setjmp = 0; diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h index e590d8be16e5db82c64c7b47061aaefcf0e11d5b..a69d05f07b0e3d8231e763f4887dfa1a3f546303 100644 --- a/vp10/decoder/decoder.h +++ b/vp10/decoder/decoder.h @@ -89,9 +89,6 @@ typedef struct VP10Decoder { int inv_tile_order; int need_resync; // wait for key/intra-only frame. int hold_ref_buf; // hold the reference buffer. -#if CONFIG_ANS - rans_dec_lut token_tab[COEFF_PROB_MODELS]; -#endif // CONFIG_ANS } VP10Decoder; int vp10_receive_compressed_data(struct VP10Decoder *pbi, diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index fa6c2cbe7c6961d396803fe03c84c74e2a7a8d1f..2603b6b48659d9f91a3ea235d944a0edd8b9da72 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -634,6 +634,7 @@ static void pack_mb_tokens(vpx_writer *w, // This function serializes the tokens backwards both in token order and // bit order in each token. static void pack_mb_tokens_ans(struct AnsCoder *const ans, + rans_dec_lut token_tab[COEFF_PROB_MODELS], const TOKENEXTRA *const start, const TOKENEXTRA *const stop, vpx_bit_depth_t bit_depth) { @@ -676,14 +677,10 @@ static void pack_mb_tokens_ans(struct AnsCoder *const ans, { struct rans_sym s; - int j; - const vpx_prob *token_probs = - vp10_pareto8_token_probs[p->context_tree[PIVOT_NODE] - 1]; - s.cum_prob = 0; - for (j = ONE_TOKEN; j < t; ++j) { - s.cum_prob += token_probs[j - ONE_TOKEN]; - } - s.prob = token_probs[t - ONE_TOKEN]; + const rans_dec_lut *token_cdf = + &token_tab[p->context_tree[PIVOT_NODE] - 1]; + s.cum_prob = (*token_cdf)[t - ONE_TOKEN]; + s.prob = (*token_cdf)[t - ONE_TOKEN + 1] - s.cum_prob; rans_write(ans, &s); } } @@ -2200,7 +2197,8 @@ static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr, NULL, NULL); vpx_stop_encode(&mode_bc); ans_write_init(&token_ans, mode_data_start + mode_bc.pos); - pack_mb_tokens_ans(&token_ans, tok, tok_end, cm->bit_depth); + pack_mb_tokens_ans(&token_ans, cm->token_tab, tok, tok_end, + cm->bit_depth); token_section_size = ans_write_end(&token_ans); if (put_tile_size) { // size of this tile diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index fc9e2e9242a0eabf8c0760e7dfd69c2345998cdc..ac8d2770c9a68896d52c40ba139d5d160cbed0ef 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -1986,6 +1986,9 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, #if CONFIG_LOOP_RESTORATION vp10_loop_restoration_precal(); #endif // CONFIG_LOOP_RESTORATION +#if CONFIG_ANS + vp10_build_pareto8_cdf_tab(vp10_pareto8_token_probs, cm->token_tab); +#endif // CONFIG_ANS cm->error.setjmp = 0;