Unverified Commit 7a479a0c authored by David Michael Barr's avatar David Michael Barr Committed by GitHub

Inline often called and almost-trivial functions (#1124)

* Inline constrain and msb for cdef_filter_block
  This reduces its average time by around 42%.
* Inline round_shift for pred_directional and others
  This reduces its average time by around 10%.
* Inline sgrproj_sum_finish to its various callers
  It is at the lowest level of a hot call graph and almost trivial.
* Inline get_mv_rate in motion estimation
  It is almost trivial and called often.
parent a6aedef7
......@@ -109,6 +109,7 @@ fn cdef_find_dir<T: Pixel>(img: &PlaneSlice<'_, T>, var: &mut i32, coeff_shift:
best_dir as i32
}
#[inline(always)]
fn constrain(diff: i32, threshold: i32, damping: i32) -> i32 {
if threshold != 0 {
let shift = cmp::max(0, damping - msb(threshold));
......
......@@ -68,6 +68,7 @@ impl RestorationFilter {
}
}
#[inline(always)]
fn sgrproj_sum_finish(ssq: i32, sum: i32, n: i32, one_over_n: i32, s: i32, bdm8: usize) -> (i32, i32) {
let scaled_ssq = ssq + (1 << 2*bdm8 >> 1) >> 2*bdm8;
let scaled_sum = sum + (1 << bdm8 >> 1) >> bdm8;
......
......@@ -762,7 +762,9 @@ fn adjust_bo<T: Pixel>(bo: &BlockOffset, fi: &FrameInvariants<T>, blk_w: usize,
}
}
#[inline(always)]
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
#[inline(always)]
fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
let d = if allow_high_precision_mv { diff } else { diff >> 1 };
if d == 0 {
......
......@@ -176,11 +176,13 @@ pub trait ILog: PrimInt {
impl<T> ILog for T where T: PrimInt {}
#[inline(always)]
pub fn msb(x: i32) -> i32 {
debug_assert!(x > 0);
31 ^ (x.leading_zeros() as i32)
}
#[inline(always)]
pub fn round_shift(value: i32, bit: usize) -> i32 {
(value + (1 << bit >> 1)) >> bit
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment