Commit eda17976 authored by Yaowu Xu's avatar Yaowu Xu

Replace divide with look-up

This commit replaces an integer divide with a table-lookup. It is
to improve decoding speed, and at the same time, to reduce possible
complications with a bug in AMD Family 12h processors:

"665 Integer Divide Instruction May Cause Unpredictable Behavior"

Change-Id: I678b707a538798a923850bac467e66e847e6def7
parent cf320213
......@@ -334,20 +334,6 @@ const vp9_tree_index vp9_switchable_interp_tree
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
};
#define COUNT_SAT 20
#define MAX_UPDATE_FACTOR 128
static int adapt_prob(vp9_prob pre_prob, const unsigned int ct[2]) {
return merge_probs(pre_prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR);
}
static void adapt_probs(const vp9_tree_index *tree,
const vp9_prob *pre_probs, const unsigned int *counts,
vp9_prob *probs) {
vp9_tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
probs);
}
void vp9_adapt_mode_probs(VP9_COMMON *cm) {
int i, j;
FRAME_CONTEXT *fc = cm->fc;
......@@ -355,39 +341,41 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
const FRAME_COUNTS *counts = &cm->counts;
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
fc->intra_inter_prob[i] = adapt_prob(pre_fc->intra_inter_prob[i],
counts->intra_inter[i]);
fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i],
counts->intra_inter[i]);
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
fc->comp_inter_prob[i] = adapt_prob(pre_fc->comp_inter_prob[i],
counts->comp_inter[i]);
fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i],
counts->comp_inter[i]);
for (i = 0; i < REF_CONTEXTS; i++)
fc->comp_ref_prob[i] = adapt_prob(pre_fc->comp_ref_prob[i],
counts->comp_ref[i]);
fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i],
counts->comp_ref[i]);
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
fc->single_ref_prob[i][j] = adapt_prob(pre_fc->single_ref_prob[i][j],
counts->single_ref[i][j]);
fc->single_ref_prob[i][j] = mode_mv_merge_probs(
pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
adapt_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i],
vp9_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i],
counts->inter_mode[i], fc->inter_mode_probs[i]);
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
adapt_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i],
vp9_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i],
counts->y_mode[i], fc->y_mode_prob[i]);
for (i = 0; i < INTRA_MODES; ++i)
adapt_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i],
counts->uv_mode[i], fc->uv_mode_prob[i]);
vp9_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i],
counts->uv_mode[i], fc->uv_mode_prob[i]);
for (i = 0; i < PARTITION_CONTEXTS; i++)
adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i],
counts->partition[i], fc->partition_prob[i]);
vp9_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i],
counts->partition[i], fc->partition_prob[i]);
if (cm->interp_filter == SWITCHABLE) {
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
counts->switchable_interp[i], fc->switchable_interp_prob[i]);
vp9_tree_merge_probs(vp9_switchable_interp_tree,
pre_fc->switchable_interp_prob[i],
counts->switchable_interp[i],
fc->switchable_interp_prob[i]);
}
if (cm->tx_mode == TX_MODE_SELECT) {
......@@ -399,23 +387,24 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
for (j = 0; j < TX_SIZES - 3; ++j)
fc->tx_probs.p8x8[i][j] = adapt_prob(pre_fc->tx_probs.p8x8[i][j],
branch_ct_8x8p[j]);
fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs(
pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]);
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
for (j = 0; j < TX_SIZES - 2; ++j)
fc->tx_probs.p16x16[i][j] = adapt_prob(pre_fc->tx_probs.p16x16[i][j],
branch_ct_16x16p[j]);
fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs(
pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]);
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
for (j = 0; j < TX_SIZES - 1; ++j)
fc->tx_probs.p32x32[i][j] = adapt_prob(pre_fc->tx_probs.p32x32[i][j],
branch_ct_32x32p[j]);
fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs(
pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]);
}
}
for (i = 0; i < SKIP_CONTEXTS; ++i)
fc->skip_probs[i] = adapt_prob(pre_fc->skip_probs[i], counts->skip[i]);
fc->skip_probs[i] = mode_mv_merge_probs(
pre_fc->skip_probs[i], counts->skip[i]);
}
static void set_default_lf_deltas(struct loopfilter *lf) {
......
......@@ -11,9 +11,6 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_entropymv.h"
#define MV_COUNT_SAT 20
#define MV_MAX_UPDATE_FACTOR 128
// Integer pel reference mv threshold for use of high-precision 1/8 mv
#define COMPANDED_MVREF_THRESH 8
......@@ -183,16 +180,6 @@ void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) {
}
}
static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
return merge_probs(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR);
}
static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
const unsigned int *counts, vp9_prob *probs) {
vp9_tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT,
MV_MAX_UPDATE_FACTOR, probs);
}
void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
int i, j;
......@@ -200,30 +187,32 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
const nmv_context_counts *counts = &cm->counts.mv;
adapt_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, fc->joints);
vp9_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints,
fc->joints);
for (i = 0; i < 2; ++i) {
nmv_component *comp = &fc->comps[i];
const nmv_component *pre_comp = &pre_fc->comps[i];
const nmv_component_counts *c = &counts->comps[i];
comp->sign = adapt_prob(pre_comp->sign, c->sign);
adapt_probs(vp9_mv_class_tree, pre_comp->classes, c->classes,
comp->classes);
adapt_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, comp->class0);
comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign);
vp9_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes,
comp->classes);
vp9_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0,
comp->class0);
for (j = 0; j < MV_OFFSET_BITS; ++j)
comp->bits[j] = adapt_prob(pre_comp->bits[j], c->bits[j]);
comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
for (j = 0; j < CLASS0_SIZE; ++j)
adapt_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], c->class0_fp[j],
comp->class0_fp[j]);
vp9_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j],
c->class0_fp[j], comp->class0_fp[j]);
adapt_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
vp9_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
if (allow_hp) {
comp->class0_hp = adapt_prob(pre_comp->class0_hp, c->class0_hp);
comp->hp = adapt_prob(pre_comp->hp, c->hp);
comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp);
}
}
}
......
......@@ -29,33 +29,25 @@ const uint8_t vp9_norm[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static unsigned int tree_merge_probs_impl(unsigned int i,
const vp9_tree_index *tree,
const vp9_prob *pre_probs,
const unsigned int *counts,
unsigned int count_sat,
unsigned int max_update,
vp9_prob *probs) {
const int l = tree[i];
const unsigned int left_count = (l <= 0)
? counts[-l]
: tree_merge_probs_impl(l, tree, pre_probs, counts,
count_sat, max_update, probs);
: tree_merge_probs_impl(l, tree, pre_probs, counts, probs);
const int r = tree[i + 1];
const unsigned int right_count = (r <= 0)
? counts[-r]
: tree_merge_probs_impl(r, tree, pre_probs, counts,
count_sat, max_update, probs);
: tree_merge_probs_impl(r, tree, pre_probs, counts, probs);
const unsigned int ct[2] = { left_count, right_count };
probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
count_sat, max_update);
probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct);
return left_count + right_count;
}
void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
const unsigned int *counts, unsigned int count_sat,
unsigned int max_update_factor, vp9_prob *probs) {
tree_merge_probs_impl(0, tree, pre_probs, counts, count_sat,
max_update_factor, probs);
const unsigned int *counts, vp9_prob *probs) {
tree_merge_probs_impl(0, tree, pre_probs, counts, probs);
}
......@@ -33,6 +33,8 @@ typedef int8_t vp9_tree_index;
#define vp9_complement(x) (255 - x)
#define MODE_MV_COUNT_SAT 20
/* We build coding trees compactly in arrays.
Each node of the tree is a pair of vp9_tree_indices.
Array index often references a corresponding probability table.
......@@ -69,9 +71,28 @@ static INLINE vp9_prob merge_probs(vp9_prob pre_prob,
return weighted_prob(pre_prob, prob, factor);
}
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = {
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
70, 76, 83, 89, 96, 102, 108, 115, 121, 128
};
static INLINE vp9_prob mode_mv_merge_probs(vp9_prob pre_prob,
const unsigned int ct[2]) {
const unsigned int den = ct[0] + ct[1];
if (den == 0) {
return pre_prob;
} else {
const unsigned int count = MIN(den, MODE_MV_COUNT_SAT);
const unsigned int factor = count_to_update_factor[count];
const vp9_prob prob =
clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den);
return weighted_prob(pre_prob, prob, factor);
}
}
void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
const unsigned int *counts, unsigned int count_sat,
unsigned int max_update_factor, vp9_prob *probs);
const unsigned int *counts, vp9_prob *probs);
DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment