From 6db4fd68439319df6389d0deaac327dafc6d3c2f Mon Sep 17 00:00:00 2001
From: rzumer <rzumer@tebako.net>
Date: Wed, 1 Aug 2018 19:54:47 -0400
Subject: [PATCH] Add optional comparative benchmarks

Run with --features=comparative_bench
Applies to intra prediction tests and disables native function inlining
Also refactor benchmarks and reduce code duplication
---
 Cargo.toml                     |   1 +
 README.md                      |  10 +
 benches/bench.rs               | 364 ++-------------------------------
 benches/comparative/mod.rs     |  30 +++
 benches/comparative/predict.rs | 124 +++++++++++
 benches/predict.rs             | 102 +++++++++
 src/partition.rs               |   6 +-
 src/predict.rs                 |  28 ++-
 8 files changed, 309 insertions(+), 356 deletions(-)
 mode change 100644 => 100755 Cargo.toml
 mode change 100644 => 100755 benches/bench.rs
 create mode 100755 benches/comparative/mod.rs
 create mode 100755 benches/comparative/predict.rs
 create mode 100755 benches/predict.rs
 mode change 100644 => 100755 src/partition.rs
 mode change 100644 => 100755 src/predict.rs

diff --git a/Cargo.toml b/Cargo.toml
old mode 100644
new mode 100755
index 1c48de10..f65d26bd
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,6 +8,7 @@ include = ["/src/**", "/aom_build/**", "/Cargo.toml"]
 [features]
 repl = ["rustyline"]
 decode_test = ["bindgen"]
+comparative_bench = []
 
 [dependencies]
 bitstream-io = "0.6"
diff --git a/README.md b/README.md
index 2f0c8ec5..eb410ed7 100755
--- a/README.md
+++ b/README.md
@@ -104,6 +104,16 @@ Run encode-decode integration tests with:
 cargo test --release --features=decode_test -- --ignored
 ```
 
+Run regular benchmarks with:
+```
+cargo bench
+```
+
+Run comparative benchmarks with:
+```
+cargo bench --features=comparative_bench
+```
+
 # Getting in Touch
 
 Come chat with us on the IRC channel #daala on Freenode! If you don't have IRC set
diff --git a/benches/bench.rs b/benches/bench.rs
old mode 100644
new mode 100755
index c8f5bb71..8125e0a6
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -9,341 +9,20 @@
 
 #[macro_use]
 extern crate bencher;
-extern crate libc;
 extern crate rand;
 extern crate rav1e;
 
-use bencher::*;
-use rand::{ChaChaRng, Rng, SeedableRng};
-use rav1e::predict::*;
-
-extern {
-  fn highbd_dc_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_h_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_v_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_paeth_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_smooth_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_smooth_h_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_smooth_v_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-}
-
-#[inline(always)]
-fn pred_dc_4x4(
-  output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
-) {
-  unsafe {
-    highbd_dc_predictor(
-      output.as_mut_ptr(),
-      stride as libc::ptrdiff_t,
-      4,
-      4,
-      above.as_ptr(),
-      left.as_ptr(),
-      8
-    );
-  }
-}
-
-#[inline(always)]
-fn pred_h_4x4(output: &mut [u16], stride: usize, above: &[u16], left: &[u16]) {
-  unsafe {
-    highbd_h_predictor(
-      output.as_mut_ptr(),
-      stride as libc::ptrdiff_t,
-      4,
-      4,
-      above.as_ptr(),
-      left.as_ptr(),
-      8
-    );
-  }
-}
-
-#[inline(always)]
-fn pred_v_4x4(output: &mut [u16], stride: usize, above: &[u16], left: &[u16]) {
-  unsafe {
-    highbd_v_predictor(
-      output.as_mut_ptr(),
-      stride as libc::ptrdiff_t,
-      4,
-      4,
-      above.as_ptr(),
-      left.as_ptr(),
-      8
-    );
-  }
-}
-
-#[inline(always)]
-fn pred_paeth_4x4(
-  output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
-) {
-  unsafe {
-    highbd_paeth_predictor(
-      output.as_mut_ptr(),
-      stride as libc::ptrdiff_t,
-      4,
-      4,
-      above.as_ptr(),
-      left.as_ptr(),
-      8
-    );
-  }
-}
-
-#[inline(always)]
-fn pred_smooth_4x4(
-  output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
-) {
-  unsafe {
-    highbd_smooth_predictor(
-      output.as_mut_ptr(),
-      stride as libc::ptrdiff_t,
-      4,
-      4,
-      above.as_ptr(),
-      left.as_ptr(),
-      8
-    );
-  }
-}
-
-#[inline(always)]
-fn pred_smooth_h_4x4(
-  output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
-) {
-  unsafe {
-    highbd_smooth_h_predictor(
-      output.as_mut_ptr(),
-      stride as libc::ptrdiff_t,
-      4,
-      4,
-      above.as_ptr(),
-      left.as_ptr(),
-      8
-    );
-  }
-}
-
-#[inline(always)]
-fn pred_smooth_v_4x4(
-  output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
-) {
-  unsafe {
-    highbd_smooth_v_predictor(
-      output.as_mut_ptr(),
-      stride as libc::ptrdiff_t,
-      4,
-      4,
-      above.as_ptr(),
-      left.as_ptr(),
-      8
-    );
-  }
-}
-
-const MAX_ITER: usize = 50000;
-
-fn setup_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
-  let output = vec![0u16; 32 * 32];
-  let above: Vec<u16> = (0..32).map(|_| ra.gen()).collect();
-  let left: Vec<u16> = (0..32).map(|_| ra.gen()).collect();
-
-  (above, left, output)
-}
-
-fn intra_dc_pred_native(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_dc(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
-
-fn intra_dc_pred_aom(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      pred_dc_4x4(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
-
-fn intra_h_pred_native(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (_above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_h(&mut output, 32, &left[..4]);
-    }
-  })
-}
-
-fn intra_h_pred_aom(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      pred_h_4x4(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
-
-fn intra_v_pred_native(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, _left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_v(&mut output, 32, &above[..4]);
-    }
-  })
-}
-
-fn intra_v_pred_aom(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      pred_v_4x4(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
-
-fn intra_paeth_pred_native(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-  let above_left = unsafe { *above.as_ptr().offset(-1) };
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_paeth(
-        &mut output,
-        32,
-        &above[..4],
-        &left[..4],
-        above_left
-      );
-    }
-  })
-}
-
-fn intra_paeth_pred_aom(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      pred_paeth_4x4(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
-
-fn intra_smooth_pred_native(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_smooth(&mut output, 32, &above[..4], &left[..4], 8);
-    }
-  })
-}
-
-fn intra_smooth_pred_aom(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      pred_smooth_4x4(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
-
-fn intra_smooth_h_pred_native(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_smooth_h(&mut output, 32, &above[..4], &left[..4], 8);
-    }
-  })
-}
-
-fn intra_smooth_h_pred_aom(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      pred_smooth_h_4x4(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
-
-fn intra_smooth_v_pred_native(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_smooth_v(&mut output, 32, &above[..4], &left[..4], 8);
-    }
-  })
-}
-
-fn intra_smooth_v_pred_aom(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (above, left, mut output) = setup_pred(&mut ra);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      pred_smooth_v_4x4(&mut output, 32, &above[..4], &left[..4]);
-    }
-  })
-}
+mod predict;
 
+use bencher::*;
+use rav1e::*;
 use rav1e::context::*;
 use rav1e::ec;
 use rav1e::partition::*;
-use rav1e::*;
+use rav1e::predict::*;
+
+#[cfg(feature = "comparative_bench")]
+mod comparative;
 
 struct WriteB {
   tx_size: TxSize,
@@ -423,21 +102,18 @@ fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
 }
 
 benchmark_group!(
-  intra,
-  intra_dc_pred_native,
-  intra_dc_pred_aom,
-  intra_h_pred_native,
-  intra_h_pred_aom,
-  intra_v_pred_native,
-  intra_v_pred_aom,
-  intra_paeth_pred_native,
-  intra_paeth_pred_aom,
-  intra_smooth_pred_native,
-  intra_smooth_pred_aom,
-  intra_smooth_h_pred_native,
-  intra_smooth_h_pred_aom,
-  intra_smooth_v_pred_native,
-  intra_smooth_v_pred_aom
+  intra_prediction,
+  predict::intra_dc_4x4,
+  predict::intra_h_4x4,
+  predict::intra_v_4x4,
+  predict::intra_paeth_4x4,
+  predict::intra_smooth_4x4,
+  predict::intra_smooth_h_4x4,
+  predict::intra_smooth_v_4x4
 );
 
-benchmark_main!(intra, write_b);
+#[cfg(feature = "comparative_bench")]
+benchmark_main!(comparative::intra_prediction);
+
+#[cfg(not(feature = "comparative_bench"))]
+benchmark_main!(write_b, intra_prediction);
diff --git a/benches/comparative/mod.rs b/benches/comparative/mod.rs
new file mode 100755
index 00000000..691e4ddb
--- /dev/null
+++ b/benches/comparative/mod.rs
@@ -0,0 +1,30 @@
+// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
+//
+// This source code is subject to the terms of the BSD 2 Clause License and
+// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+// was not distributed with this source code in the LICENSE file, you can
+// obtain it at www.aomedia.org/license/software. If the Alliance for Open
+// Media Patent License 1.0 was not distributed with this source code in the
+// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+
+extern crate libc;
+
+mod predict;
+
+benchmark_group!(
+  intra_prediction,
+  predict::intra_dc_4x4_native,
+  predict::intra_dc_4x4_aom,
+  predict::intra_h_4x4_native,
+  predict::intra_h_4x4_aom,
+  predict::intra_v_4x4_native,
+  predict::intra_v_4x4_aom,
+  predict::intra_paeth_4x4_native,
+  predict::intra_paeth_4x4_aom,
+  predict::intra_smooth_4x4_native,
+  predict::intra_smooth_4x4_aom,
+  predict::intra_smooth_h_4x4_native,
+  predict::intra_smooth_h_4x4_aom,
+  predict::intra_smooth_v_4x4_native,
+  predict::intra_smooth_v_4x4_aom
+);
diff --git a/benches/comparative/predict.rs b/benches/comparative/predict.rs
new file mode 100755
index 00000000..f2d76589
--- /dev/null
+++ b/benches/comparative/predict.rs
@@ -0,0 +1,124 @@
+// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
+//
+// This source code is subject to the terms of the BSD 2 Clause License and
+// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+// was not distributed with this source code in the LICENSE file, you can
+// obtain it at www.aomedia.org/license/software. If the Alliance for Open
+// Media Patent License 1.0 was not distributed with this source code in the
+// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+
+use bencher::*;
+use comparative::libc;
+use predict as predict_native;
+use predict::*;
+use rand::{ChaChaRng, SeedableRng};
+
+extern {
+  fn highbd_dc_predictor(
+    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
+    above: *const u16, left: *const u16, bd: libc::c_int
+  );
+
+  fn highbd_h_predictor(
+    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
+    above: *const u16, left: *const u16, bd: libc::c_int
+  );
+
+  fn highbd_v_predictor(
+    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
+    above: *const u16, left: *const u16, bd: libc::c_int
+  );
+
+  fn highbd_paeth_predictor(
+    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
+    above: *const u16, left: *const u16, bd: libc::c_int
+  );
+
+  fn highbd_smooth_predictor(
+    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
+    above: *const u16, left: *const u16, bd: libc::c_int
+  );
+
+  fn highbd_smooth_h_predictor(
+    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
+    above: *const u16, left: *const u16, bd: libc::c_int
+  );
+
+  fn highbd_smooth_v_predictor(
+    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
+    above: *const u16, left: *const u16, bd: libc::c_int
+  );
+}
+
+fn predict_intra_4x4_aom(
+  b: &mut Bencher, 
+  predictor: unsafe extern "C" fn(*mut u16, libc::ptrdiff_t, libc::c_int, libc::c_int, *const u16, *const u16, libc::c_int)) 
+{
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above_context, left_context) = generate_block(&mut rng);
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      unsafe {
+        predictor(block.as_mut_ptr(), BLOCK_SIZE.width() as libc::ptrdiff_t, 
+          4, 4, above_context.as_ptr(), left_context.as_ptr(), 8);
+      }
+    }
+  })
+}
+
+pub fn intra_dc_4x4_native(b: &mut Bencher) {
+  predict_native::intra_dc_4x4(b);
+}
+
+pub fn intra_dc_4x4_aom(b: &mut Bencher) {
+  predict_intra_4x4_aom(b, highbd_dc_predictor);
+}
+
+pub fn intra_h_4x4_native(b: &mut Bencher) {
+  predict_native::intra_h_4x4(b);
+}
+
+pub fn intra_h_4x4_aom(b: &mut Bencher) {
+  predict_intra_4x4_aom(b, highbd_h_predictor);
+}
+
+pub fn intra_v_4x4_native(b: &mut Bencher) {
+  predict_native::intra_v_4x4(b);
+}
+
+pub fn intra_v_4x4_aom(b: &mut Bencher) {
+  predict_intra_4x4_aom(b, highbd_v_predictor);
+}
+
+pub fn intra_paeth_4x4_native(b: &mut Bencher) {
+  predict_native::intra_paeth_4x4(b);
+}
+
+pub fn intra_paeth_4x4_aom(b: &mut Bencher) {
+  predict_intra_4x4_aom(b, highbd_paeth_predictor);
+}
+
+pub fn intra_smooth_4x4_native(b: &mut Bencher) {
+  predict_native::intra_smooth_4x4(b);
+}
+
+pub fn intra_smooth_4x4_aom(b: &mut Bencher) {
+  predict_intra_4x4_aom(b, highbd_smooth_predictor);
+}
+
+pub fn intra_smooth_h_4x4_native(b: &mut Bencher) {
+  predict_native::intra_smooth_h_4x4(b);
+}
+
+pub fn intra_smooth_h_4x4_aom(b: &mut Bencher) {
+  predict_intra_4x4_aom(b, highbd_smooth_h_predictor);
+}
+
+pub fn intra_smooth_v_4x4_native(b: &mut Bencher) {
+  predict_native::intra_smooth_v_4x4(b);
+}
+
+pub fn intra_smooth_v_4x4_aom(b: &mut Bencher) {
+  predict_intra_4x4_aom(b, highbd_smooth_v_predictor);
+}
diff --git a/benches/predict.rs b/benches/predict.rs
new file mode 100755
index 00000000..74c7c95f
--- /dev/null
+++ b/benches/predict.rs
@@ -0,0 +1,102 @@
+// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
+//
+// This source code is subject to the terms of the BSD 2 Clause License and
+// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+// was not distributed with this source code in the LICENSE file, you can
+// obtain it at www.aomedia.org/license/software. If the Alliance for Open
+// Media Patent License 1.0 was not distributed with this source code in the
+// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+
+use bencher::*;
+use rand::{ChaChaRng, Rng, SeedableRng};
+use rav1e::partition::BlockSize;
+use rav1e::predict::{Block4x4, Intra};
+
+pub const MAX_ITER: usize = 50000;
+pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32;
+
+pub fn generate_block(rng: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
+  let block = vec![0u16; BLOCK_SIZE.width() * BLOCK_SIZE.height()];
+  let above_context: Vec<u16> = (0..BLOCK_SIZE.height()).map(|_| rng.gen()).collect();
+  let left_context: Vec<u16> = (0..BLOCK_SIZE.width()).map(|_| rng.gen()).collect();
+
+  (block, above_context, left_context)
+}
+
+pub fn intra_dc_4x4(b: &mut Bencher) {
+  let mut ra = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut ra);
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      Block4x4::pred_dc(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
+    }
+  })
+}
+
+pub fn intra_h_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, _above, left) = generate_block(&mut rng);
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      Block4x4::pred_h(&mut block, BLOCK_SIZE.width(), &left[..4]);
+    }
+  })
+}
+
+pub fn intra_v_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, _left) = generate_block(&mut rng);
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      Block4x4::pred_v(&mut block, BLOCK_SIZE.width(), &above[..4]);
+    }
+  })
+}
+
+pub fn intra_paeth_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+  let above_left = unsafe { *above.as_ptr().offset(-1) };
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      Block4x4::pred_paeth(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4], above_left);
+    }
+  })
+}
+
+pub fn intra_smooth_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      Block4x4::pred_smooth(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
+    }
+  })
+}
+
+pub fn intra_smooth_h_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      Block4x4::pred_smooth_h(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
+    }
+  })
+}
+
+pub fn intra_smooth_v_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+
+  b.iter(|| {
+    for _ in 0..MAX_ITER {
+      Block4x4::pred_smooth_v(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
+    }
+  })
+}
diff --git a/src/partition.rs b/src/partition.rs
old mode 100644
new mode 100755
index fbb88853..57016c15
--- a/src/partition.rs
+++ b/src/partition.rs
@@ -435,11 +435,11 @@ impl PredictionMode {
       PredictionMode::PAETH_PRED =>
         B::pred_paeth(slice, stride, above_slice, left_slice, above[0]),
       PredictionMode::SMOOTH_PRED =>
-        B::pred_smooth(slice, stride, above_slice, left_slice, 8),
+        B::pred_smooth(slice, stride, above_slice, left_slice),
       PredictionMode::SMOOTH_H_PRED =>
-        B::pred_smooth_h(slice, stride, above_slice, left_slice, 8),
+        B::pred_smooth_h(slice, stride, above_slice, left_slice),
       PredictionMode::SMOOTH_V_PRED =>
-        B::pred_smooth_v(slice, stride, above_slice, left_slice, 8),
+        B::pred_smooth_v(slice, stride, above_slice, left_slice),
       _ => unimplemented!()
     }
   }
diff --git a/src/predict.rs b/src/predict.rs
old mode 100644
new mode 100755
index ea6ac12e..6fc452e9
--- a/src/predict.rs
+++ b/src/predict.rs
@@ -178,6 +178,7 @@ impl Dim for Block32x32 {
 }
 
 pub trait Intra: Dim {
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_dc(output: &mut [u16], stride: usize, above: &[u16], left: &[u16]) {
     let edges = left[..Self::H].iter().chain(above[..Self::W].iter());
     let len = (Self::W + Self::H) as u32;
@@ -191,6 +192,7 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_dc_128(output: &mut [u16], stride: usize) {
     for y in 0..Self::H {
       for x in 0..Self::W {
@@ -199,6 +201,7 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_dc_left(
     output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
   ) {
@@ -215,6 +218,7 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_dc_top(
     output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
   ) {
@@ -231,6 +235,7 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_h(output: &mut [u16], stride: usize, left: &[u16]) {
     for (line, l) in output.chunks_mut(stride).zip(left[..Self::H].iter()) {
       for v in &mut line[..Self::W] {
@@ -239,12 +244,14 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_v(output: &mut [u16], stride: usize, above: &[u16]) {
     for line in output.chunks_mut(stride).take(Self::H) {
       line[..Self::W].clone_from_slice(&above[..Self::W])
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_paeth(
     output: &mut [u16], stride: usize, above: &[u16], left: &[u16],
     above_left: u16
@@ -275,8 +282,9 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_smooth(
-    output: &mut [u16], stride: usize, above: &[u16], left: &[u16], _bd: u8
+    output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
   ) {
     let below_pred = left[Self::H - 1]; // estimated by bottom-left pixel
     let right_pred = above[Self::W - 1]; // estimated by top-right pixel
@@ -325,8 +333,9 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_smooth_h(
-    output: &mut [u16], stride: usize, above: &[u16], left: &[u16], _bd: u8
+    output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
   ) {
     let right_pred = above[Self::W - 1]; // estimated by top-right pixel
     let sm_weights = &sm_weight_arrays[Self::W..];
@@ -361,8 +370,9 @@ pub trait Intra: Dim {
     }
   }
 
+  #[cfg_attr(feature = "comparative_bench", inline(never))]
   fn pred_smooth_v(
-    output: &mut [u16], stride: usize, above: &[u16], left: &[u16], _bd: u8
+    output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
   ) {
     let below_pred = left[Self::H - 1]; // estimated by bottom-left pixel
     let sm_weights = &sm_weight_arrays[Self::H..];
@@ -576,7 +586,7 @@ pub mod test {
     let (above, left, mut o1, mut o2) = setup_pred(ra);
 
     pred_smooth_4x4(&mut o1, 32, &above[..4], &left[..4]);
-    Block4x4::pred_smooth(&mut o2, 32, &above[..4], &left[..4], 8);
+    Block4x4::pred_smooth(&mut o2, 32, &above[..4], &left[..4]);
 
     (o1, o2)
   }
@@ -585,7 +595,7 @@ pub mod test {
     let (above, left, mut o1, mut o2) = setup_pred(ra);
 
     pred_smooth_h_4x4(&mut o1, 32, &above[..4], &left[..4]);
-    Block4x4::pred_smooth_h(&mut o2, 32, &above[..4], &left[..4], 8);
+    Block4x4::pred_smooth_h(&mut o2, 32, &above[..4], &left[..4]);
 
     (o1, o2)
   }
@@ -594,7 +604,7 @@ pub mod test {
     let (above, left, mut o1, mut o2) = setup_pred(ra);
 
     pred_smooth_v_4x4(&mut o1, 32, &above[..4], &left[..4]);
-    Block4x4::pred_smooth_v(&mut o2, 32, &above[..4], &left[..4], 8);
+    Block4x4::pred_smooth_v(&mut o2, 32, &above[..4], &left[..4]);
 
     (o1, o2)
   }
@@ -686,7 +696,7 @@ pub mod test {
       }
     }
 
-    Block4x4::pred_smooth(&mut o, 32, &above[..4], &left[..4], 12);
+    Block4x4::pred_smooth(&mut o, 32, &above[..4], &left[..4]);
 
     for l in o.chunks(32).take(4) {
       for v in l[..4].iter() {
@@ -694,7 +704,7 @@ pub mod test {
       }
     }
 
-    Block4x4::pred_smooth_h(&mut o, 32, &above[..4], &left[..4], 12);
+    Block4x4::pred_smooth_h(&mut o, 32, &above[..4], &left[..4]);
 
     for l in o.chunks(32).take(4) {
       for v in l[..4].iter() {
@@ -702,7 +712,7 @@ pub mod test {
       }
     }
 
-    Block4x4::pred_smooth_v(&mut o, 32, &above[..4], &left[..4], 12);
+    Block4x4::pred_smooth_v(&mut o, 32, &above[..4], &left[..4]);
 
     for l in o.chunks(32).take(4) {
       for v in l[..4].iter() {
-- 
GitLab