Commit 56310196 authored by James Zern's avatar James Zern

aom_dsp/get_prob: make clip_prob branchless

+ inline the function directly as there was only one consumer
(get_prob())

this is an attempt to reduce the amount of branches to workaround an amd
bug. this change is mildly faster or neutral across x86-64, arm.

http://support.amd.com/TechDocs/44739_12h_Rev_Gd.pdf
665 Integer Divide Instruction May Cause Unpredictable Behavior

cherry-picked from libvpx:
7481edb33 vpx_dsp/get_prob: make clip_prob branchless

Change-Id: I433059c61ce43ec5058cc16ca590d186bfa8aab5
parent 19d78227
......@@ -53,13 +53,14 @@ typedef int8_t aom_tree_index;
typedef const aom_tree_index aom_tree[];
static INLINE aom_prob clip_prob(int p) {
return (p > 255) ? 255 : (p < 1) ? 1 : p;
}
static INLINE aom_prob get_prob(unsigned int num, unsigned int den) {
if (den == 0) return 128u;
return clip_prob((int)(((int64_t)num * 256 + (den >> 1)) / den));
{
const int p = (int)(((int64_t)num * 256 + (den >> 1)) / den);
// (p > 255) ? 255 : (p < 1) ? 1 : p;
const int clipped_prob = p | ((255 - p) >> 23) | (p == 0);
return (aom_prob)clipped_prob;
}
}
static INLINE aom_prob get_binary_prob(unsigned int n0, unsigned int n1) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment