Commit 6bbbe316 authored by Alex Converse's avatar Alex Converse Committed by Pascal Massimino

ANS: Switch from PDFs to CDFs.

Make the RANS implementation operate on cumulative distribution
functions rather than individual probability distribution functions.
CDFs have shown themselves more flexible to work with.

Reduces decoding memory usage from scaling O(num_distributions *
symbol_resolution) to O(num_distributions).

No bitstream change. This is an purely implementation change.

Change-Id: I4e18d3a0a3d37a36a61487c3d778f9d088b0b374
parent 339ef0ce
......@@ -147,7 +147,7 @@ bool check_vpxbool(const PvVec &pv_vec, uint8_t *buf) {
}
const rans_sym rans_sym_tab[] = {
{70, 186}, {70, 116}, {100, 16}, {16, 0},
{16, 0}, {100, 16}, {70, 116}, {70, 186},
};
const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]);
......@@ -170,13 +170,9 @@ std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
void rans_build_dec_tab(const struct rans_sym sym_tab[],
rans_dec_lut dec_tab) {
int val = 0;
int i;
for (i = ans_p8_precision - 1; i >= 0; --i) {
dec_tab[i].val = val;
dec_tab[i].prob = sym_tab[val].prob;
dec_tab[i].cum_prob = sym_tab[val].cum_prob;
if (i == sym_tab[val].cum_prob) ++val;
dec_tab[0] = 0;
for (int i = 1; dec_tab[i - 1] < ans_p8_precision; ++i) {
dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob;
}
}
......
......@@ -241,23 +241,16 @@ struct rans_dec_sym {
AnsP8 cum_prob; // not-inclusive
};
typedef struct rans_dec_sym rans_dec_lut[ans_p8_precision];
// This is now just a boring cdf. It starts with an explict zero.
// TODO(aconverse): Remove starting zero.
typedef uint16_t rans_dec_lut[16];
static INLINE void rans_build_dec_tab(const AnsP8 token_probs[],
rans_dec_lut dec_tab) {
int val = 0;
int cum_prob = 0;
int sym_end = token_probs[0];
static INLINE void rans_build_cdf_from_pdf(const AnsP8 token_probs[],
rans_dec_lut cdf_tab) {
int i;
for (i = 0; i < 256; ++i) {
if (i == sym_end) {
++val;
cum_prob = sym_end;
sym_end += token_probs[val];
}
dec_tab[i].val = val;
dec_tab[i].prob = token_probs[val];
dec_tab[i].cum_prob = cum_prob;
cdf_tab[0] = 0;
for (i = 1; cdf_tab[i - 1] < ans_p8_precision; ++i) {
cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
}
}
......@@ -275,20 +268,32 @@ static INLINE void rans_write(struct AnsCoder *ans,
(ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob;
}
static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
AnsP8 rem) {
int i = 0;
// TODO(skal): if critical, could be a binary search.
// Or, better, an O(1) alias-table.
while (rem >= cdf[i]) {
++i;
}
out->val = i - 1;
out->prob = cdf[i] - cdf[i - 1];
out->cum_prob = cdf[i - 1];
}
static INLINE int rans_read(struct AnsDecoder *ans,
const rans_dec_lut tab) {
unsigned rem;
unsigned quo;
int val;
struct rans_dec_sym sym;
if (ans->state < l_base && ans->buf_offset > 0) {
ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
}
quo = ans->state / ans_p8_precision;
rem = ans->state % ans_p8_precision;
val = tab[rem].val;
ans->state = quo * tab[rem].prob + rem - tab[rem].cum_prob;
return val;
fetch_sym(&sym, tab, rem);
ans->state = quo * sym.prob + rem - sym.cum_prob;
return sym.val;
}
static INLINE int ans_read_init(struct AnsDecoder *const ans,
......
......@@ -676,12 +676,12 @@ const vpx_prob vp10_pareto8_token_probs[COEFF_PROB_MODELS]
{247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
};
void vp10_build_pareto8_dec_tab(
void vp10_build_pareto8_cdf_tab(
const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2],
rans_dec_lut dec_tab[COEFF_PROB_MODELS]) {
rans_dec_lut cdf_tab[COEFF_PROB_MODELS]) {
int p;
for (p = 0; p < COEFF_PROB_MODELS; ++p) {
rans_build_dec_tab(token_probs[p], dec_tab[p]);
rans_build_cdf_from_pdf(token_probs[p], cdf_tab[p]);
}
}
#endif // CONFIG_ANS
......
......@@ -170,9 +170,9 @@ extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
extern const vpx_prob
vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2];
void vp10_build_pareto8_dec_tab(
void vp10_build_pareto8_cdf_tab(
const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2],
rans_dec_lut dec_tab[COEFF_PROB_MODELS]);
rans_dec_lut cdf_tab[COEFF_PROB_MODELS]);
#endif // CONFIG_ANS
typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS]
......
......@@ -324,6 +324,9 @@ typedef struct VP10Common {
// - this is intentionally not placed in FRAME_CONTEXT since it's reset upon
// each keyframe and not used afterwards
vpx_prob kf_y_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
#if CONFIG_ANS
rans_dec_lut token_tab[COEFF_PROB_MODELS];
#endif // CONFIG_ANS
} VP10_COMMON;
// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
......
......@@ -1858,7 +1858,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
for (col = 0; col < max_blocks_wide; col += step)
predict_and_reconstruct_intra_block(xd,
#if CONFIG_ANS
pbi->token_tab, tok,
cm->token_tab, tok,
#else
r,
#endif
......@@ -1959,7 +1959,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
for (col = 0; col < max_blocks_wide; col += step)
eobtotal += reconstruct_inter_block(xd,
#if CONFIG_ANS
pbi->token_tab, tok,
cm->token_tab, tok,
#else
r,
#endif
......
......@@ -119,7 +119,7 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
vp10_loop_restoration_precal();
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_ANS
vp10_build_pareto8_dec_tab(vp10_pareto8_token_probs, pbi->token_tab);
vp10_build_pareto8_cdf_tab(vp10_pareto8_token_probs, cm->token_tab);
#endif // CONFIG_ANS
cm->error.setjmp = 0;
......
......@@ -89,9 +89,6 @@ typedef struct VP10Decoder {
int inv_tile_order;
int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // hold the reference buffer.
#if CONFIG_ANS
rans_dec_lut token_tab[COEFF_PROB_MODELS];
#endif // CONFIG_ANS
} VP10Decoder;
int vp10_receive_compressed_data(struct VP10Decoder *pbi,
......
......@@ -634,6 +634,7 @@ static void pack_mb_tokens(vpx_writer *w,
// This function serializes the tokens backwards both in token order and
// bit order in each token.
static void pack_mb_tokens_ans(struct AnsCoder *const ans,
rans_dec_lut token_tab[COEFF_PROB_MODELS],
const TOKENEXTRA *const start,
const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth) {
......@@ -676,14 +677,10 @@ static void pack_mb_tokens_ans(struct AnsCoder *const ans,
{
struct rans_sym s;
int j;
const vpx_prob *token_probs =
vp10_pareto8_token_probs[p->context_tree[PIVOT_NODE] - 1];
s.cum_prob = 0;
for (j = ONE_TOKEN; j < t; ++j) {
s.cum_prob += token_probs[j - ONE_TOKEN];
}
s.prob = token_probs[t - ONE_TOKEN];
const rans_dec_lut *token_cdf =
&token_tab[p->context_tree[PIVOT_NODE] - 1];
s.cum_prob = (*token_cdf)[t - ONE_TOKEN];
s.prob = (*token_cdf)[t - ONE_TOKEN + 1] - s.cum_prob;
rans_write(ans, &s);
}
}
......@@ -2200,7 +2197,8 @@ static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr,
NULL, NULL);
vpx_stop_encode(&mode_bc);
ans_write_init(&token_ans, mode_data_start + mode_bc.pos);
pack_mb_tokens_ans(&token_ans, tok, tok_end, cm->bit_depth);
pack_mb_tokens_ans(&token_ans, cm->token_tab, tok, tok_end,
cm->bit_depth);
token_section_size = ans_write_end(&token_ans);
if (put_tile_size) {
// size of this tile
......
......@@ -1986,6 +1986,9 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
#if CONFIG_LOOP_RESTORATION
vp10_loop_restoration_precal();
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_ANS
vp10_build_pareto8_cdf_tab(vp10_pareto8_token_probs, cm->token_tab);
#endif // CONFIG_ANS
cm->error.setjmp = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment