Commit f6c807c5 authored by Timothy B. Terriberry's avatar Timothy B. Terriberry Committed by Tim Terriberry

ec_smallmul: Convert CDFs to iCDFs.

Hoists the iCDF conversion outside of the daala code.
We directly store 32768 - cdf[i] in each cdf, to avoid having to
convert the whole array every time a symbol is coded.

This works with ec_multisymbol, new_tokenset, and ec_adapt.

Compared to Change-Id Idbbd3743e9189146cb519d5b984bdabd69e3f4c0,
this improves decoder runtimes by 1.15% at QP=55 and 2.64% at
QP=20.

The overall slowdown of ec_smallmul is now 0.12% at QP=55 and
0.44% at QP=20.

Encoder output should not change, and all streams should remain
decodable without decoder changes.

Change-Id: I06b8b75b667bb1bc4ddffc78f895e48a09f4c578
parent f8e87b46
......@@ -113,16 +113,7 @@ static INLINE int aom_daala_reader_has_error(daala_reader *r) {
static INLINE int daala_read_symbol(daala_reader *r, const aom_cdf_prob *cdf,
int nsymbs) {
int symb;
#if CONFIG_EC_SMALLMUL
{
aom_cdf_prob icdf[16];
int i;
for (i = 0; i < nsymbs; i++) icdf[i] = OD_ICDF(cdf[i]);
symb = od_ec_decode_cdf_q15(&r->ec, icdf, nsymbs);
}
#else
symb = od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
#endif
#if CONFIG_BITSTREAM_DEBUG
{
......
......@@ -77,16 +77,7 @@ static INLINE void daala_write_symbol(daala_writer *w, int symb,
bitstream_queue_push(symb, cdf, nsymbs);
#endif
#if CONFIG_EC_SMALLMUL
{
aom_cdf_prob icdf[16];
int i;
for (i = 0; i < nsymbs; i++) icdf[i] = OD_ICDF(cdf[i]);
od_ec_encode_cdf_q15(&w->ec, symb, icdf, nsymbs);
}
#else
od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
#endif
}
#ifdef __cplusplus
......
......@@ -17,10 +17,6 @@
#include "aom_dsp/prob.h"
#if CONFIG_DAALA_EC
#include "aom_dsp/entcode.h"
#endif
const uint8_t aom_norm[256] = {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
......@@ -202,8 +198,9 @@ int tree_to_cdf(const aom_tree_index *tree, const aom_prob *probs,
/* Extract the cdf, index, path and length */
tree_node_extract(symb, 0, 0, cdf, index, path, len);
/* Convert to CDF */
cdf[0] = AOM_ICDF(cdf[0]);
for (i = 1; i < nsymbs; i++) {
cdf[i] = cdf[i - 1] + cdf[i];
cdf[i] = AOM_ICDF(AOM_ICDF(cdf[i - 1]) + cdf[i]);
}
// Store symbol count at the end of the CDF
#if CONFIG_EC_ADAPT
......
......@@ -20,6 +20,10 @@
#include "aom_ports/bitops.h"
#include "aom_ports/mem.h"
#if CONFIG_DAALA_EC
#include "aom_dsp/entcode.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
......@@ -36,6 +40,12 @@ typedef uint16_t aom_cdf_prob;
#define CDF_PROB_BITS 15
#define CDF_PROB_TOP (1 << CDF_PROB_BITS)
#if CONFIG_DAALA_EC
#define AOM_ICDF OD_ICDF
#else
#define AOM_ICDF(x) (x)
#endif
#define MAX_PROB 255
#define aom_prob_half ((aom_prob)128)
......@@ -152,13 +162,20 @@ static INLINE void update_cdf(aom_cdf_prob *cdf, int val, int nsymbs) {
int diff;
#if 1
const int tmp0 = 1 << rate2;
tmp = tmp0;
tmp = AOM_ICDF(tmp0);
diff = ((CDF_PROB_TOP - (nsymbs << rate2)) >> rate) << rate;
// Single loop (faster)
// Single loop (faster)
#if CONFIG_DAALA_EC && CONFIG_EC_SMALLMUL
for (i = 0; i < nsymbs - 1; ++i, tmp -= tmp0) {
tmp -= (i == val ? diff : 0);
cdf[i] += ((tmp - cdf[i]) >> rate);
}
#else
for (i = 0; i < nsymbs - 1; ++i, tmp += tmp0) {
tmp += (i == val ? diff : 0);
cdf[i] -= ((cdf[i] - tmp) >> rate);
}
#endif
#else
for (i = 0; i < nsymbs; ++i) {
tmp = (i + 1) << rate2;
......
This diff is collapsed.
This diff is collapsed.
......@@ -44,24 +44,29 @@ const aom_tree_index av1_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
static const nmv_context default_nmv_context = {
{ 32, 64, 96 }, // joints
#if CONFIG_EC_MULTISYMBOL
{ 4096, 11264, 19328, 32768, 0 }, // joint_cdf
{ AOM_ICDF(4096), AOM_ICDF(11264), AOM_ICDF(19328), AOM_ICDF(32768),
0 }, // joint_cdf
#endif
{ {
// Vertical component
128, // sign
{ 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, // class
#if CONFIG_EC_MULTISYMBOL
{ 28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757, 32762, 32767,
32768, 0 }, // class_cdf
{ AOM_ICDF(28672), AOM_ICDF(30976), AOM_ICDF(31858), AOM_ICDF(32320),
AOM_ICDF(32551), AOM_ICDF(32656), AOM_ICDF(32740), AOM_ICDF(32757),
AOM_ICDF(32762), AOM_ICDF(32767), AOM_ICDF(32768), 0 }, // class_cdf
#endif
{ 216 }, // class0
{ 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits
{ { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp
{ 64, 96, 64 }, // fp
#if CONFIG_EC_MULTISYMBOL
{ { 16384, 24576, 26624, 32768, 0 },
{ 12288, 21248, 24128, 32768, 0 } }, // class0_fp_cdf
{ 8192, 17408, 21248, 32768, 0 }, // fp_cdf
{ { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(26624), AOM_ICDF(32768),
0 },
{ AOM_ICDF(12288), AOM_ICDF(21248), AOM_ICDF(24128), AOM_ICDF(32768),
0 } }, // class0_fp_cdf
{ AOM_ICDF(8192), AOM_ICDF(17408), AOM_ICDF(21248), AOM_ICDF(32768),
0 }, // fp_cdf
#endif
160, // class0_hp bit
128, // hp
......@@ -71,17 +76,21 @@ static const nmv_context default_nmv_context = {
128, // sign
{ 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, // class
#if CONFIG_EC_MULTISYMBOL
{ 28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757, 32762, 32767,
32768, 0 }, // class_cdf
{ AOM_ICDF(28672), AOM_ICDF(30976), AOM_ICDF(31858), AOM_ICDF(32320),
AOM_ICDF(32551), AOM_ICDF(32656), AOM_ICDF(32740), AOM_ICDF(32757),
AOM_ICDF(32762), AOM_ICDF(32767), AOM_ICDF(32768), 0 }, // class_cdf
#endif
{ 208 }, // class0
{ 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits
{ { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp
{ 64, 96, 64 }, // fp
#if CONFIG_EC_MULTISYMBOL
{ { 16384, 24576, 26624, 32768, 0 },
{ 12288, 21248, 24128, 32768, 0 } }, // class0_fp_cdf
{ 8192, 17408, 21248, 32768, 0 }, // fp_cdf
{ { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(26624), AOM_ICDF(32768),
0 },
{ AOM_ICDF(12288), AOM_ICDF(21248), AOM_ICDF(24128), AOM_ICDF(32768),
0 } }, // class0_fp_cdf
{ AOM_ICDF(8192), AOM_ICDF(17408), AOM_ICDF(21248), AOM_ICDF(32768),
0 }, // fp_cdf
#endif
160, // class0_hp bit
128, // hp
......
......@@ -20,7 +20,7 @@
void aom_cdf_init_q15_1D(uint16_t *cdf, int nsyms, int cdf_size) {
int i;
for (i = 0; i < nsyms; i++)
cdf[i] = (i + 1)*CDF_PROB_TOP/nsyms;
cdf[i] = AOM_ICDF((i + 1)*CDF_PROB_TOP/nsyms);
#if CONFIG_EC_ADAPT
cdf[cdf_size - 1] = 0;
......@@ -31,7 +31,7 @@ void aom_cdf_init_q15_1D(uint16_t *cdf, int nsyms, int cdf_size) {
void aom_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
int i;
*count = OD_MINI(*count + 1, 1 << rate);
OD_ASSERT(cdf[n - 1] == 32768);
OD_ASSERT(AOM_ICDF(cdf[n - 1]) == 32768);
if (*count >= 1 << rate) {
/* Steady-state adaptation based on a simple IIR with dyadic rate. */
for (i = 0; i < n; i++) {
......@@ -55,7 +55,7 @@ void aom_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
stored in a lookup table indexed by n and rate to avoid the
arithmetic. */
tmp = 2 - (1<<rate) + i + (32767 + (1<<rate) - n)*(i >= val);
cdf[i] -= (cdf[i] - tmp) >> rate;
cdf[i] = AOM_ICDF(AOM_ICDF(cdf[i]) - ((AOM_ICDF(cdf[i]) - tmp) >> rate));
}
}
else {
......@@ -67,10 +67,11 @@ void aom_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
for (i = 0; i < n; i++) {
int tmp;
tmp = (32768 - n)*(i >= val) + i + 1;
cdf[i] -= ((cdf[i] - tmp)*alpha) >> 15;
cdf[i] = AOM_ICDF(AOM_ICDF(cdf[i])
- (((AOM_ICDF(cdf[i]) - tmp)*alpha) >> 15));
}
}
OD_ASSERT(cdf[n - 1] == 32768);
OD_ASSERT(AOM_ICDF(cdf[n - 1]) == 32768);
}
/** Takes the base-2 log of E(x) in Q1.
......
This diff is collapsed.
......@@ -40,7 +40,7 @@ int aom_decode_cdf_adapt_q15_(aom_reader *r, uint16_t *cdf, int n,
int ft;
ft = cdf[n - 1];
for (i = 0; i < n; i++) {
cdf[i] = cdf[i]*32768/ft;
cdf[i] = AOM_ICDF(cdf[i]*32768/ft);
}
}
val = aom_read_cdf(r, cdf, n, ACCT_STR_NAME);
......
......@@ -42,7 +42,7 @@ void aom_encode_cdf_adapt_q15(aom_writer *w, int val, uint16_t *cdf, int n,
int ft;
ft = cdf[n - 1];
for (i = 0; i < n; i++) {
cdf[i] = cdf[i]*32768/ft;
cdf[i] = AOM_ICDF(cdf[i]*32768/ft);
}
}
aom_write_cdf(w, val, cdf, n);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment