Compare revisions

d2de1211 · 5ac63282 · 186efb07 · 8e6d7a09 · e06321d1 · 6bfa0aa8
--- a/.editorconfig
+++ b/.editorconfig
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+indent_size = 2
+indent_style = space
+# Some tools that work with text may not work correctly without the newline
+# control character at the end of the last line. This character helps them to
+# understand that the line is complete and can be considered as a line.
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.md]
+# Trailing whitespace may have a special meaning. For example, two spaces at the
+# end of a line means a line break.
+trim_trailing_whitespace = false
+
+[*.py]
+indent_size = 4
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ target
 Cargo.lock
 *.y4m
 *.ivf
+secret_key
--- a/.gitmodules
+++ b/.gitmodules
-[submodule "aom"]
-	path = aom_build/aom
-	url = https://gitlab.xiph.org/xiph/aom-rav1e.git
-	branch = rav1e_16b
--- a/.mailmap
+++ b/.mailmap
+Luca Barbato <lu_zero@gentoo.org> <luca.barbato@gmail.com>
+Frank Bossen <fbossen@gmail.com> <frank@bossentech.com>
+Yushin Cho <ycho@mozilla.com> <cho.yushin@gmail.com>
+Yushin Cho <ycho@mozilla.com> <ycho@users.noreply.github.com>
+Thomas Daede <tdaede@xiph.org> <daede003@umn.edu>
+Nathan E. Egge <negge@xiph.org> <negge@dgql.org>
+Josh Holmer <jholmer.in@gmail.com>
+Josh Holmer <jholmer.in@gmail.com> <jholmer@dminc.com>
+Monty Montgomery <monty@xiph.org> <xiphmont@gmail.com>
+Thomas Szymczak <11669680+tszymczak@users.noreply.github.com> Thomas Szymczak <you@example.com>
+Raphaël Zumer <rzumer@tebako.net>
+Raphaël Zumer <rzumer@tebako.net> <rzumer@gmail.com>
--- a/.travis.yml
+++ b/.travis.yml
 language: rust
+rust:
+  - 1.35.0
+env:
+  - RUST_BACKTRACE=1
 addons:
  apt:
-    packages:
-      - nasm
-
+    packages: binutils-dev libcurl4-openssl-dev zlib1g-dev libdw-dev libiberty-dev ninja-build
 before_install:
-    - wget -O cmake.sh https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.sh
+    - wget -O cmake.sh https://cmake.org/files/v3.13/cmake-3.13.3-Linux-x86_64.sh
    - sudo sh cmake.sh --skip-license --exclude-subdir --prefix=/usr
    - sudo rm -fR /usr/local/cmake*
    - hash -r
    - which cmake
    - cmake --version
-    - wget https://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.xz
-    - tar -xvf nasm-2.13.03.tar.xz
-    - cd nasm-2.13.03
-    - ./configure
-    - make
-    - sudo make install
+    - curl -L https://github.com/mozilla/sccache/releases/download/0.2.8/sccache-0.2.8-x86_64-unknown-linux-musl.tar.gz | tar xvz
+    - export PATH=$PATH:`pwd`/sccache-0.2.8-x86_64-unknown-linux-musl
+    - export RUSTC_WRAPPER=sccache
+    - export SCCACHE_CACHE_SIZE=500M
+    - export SCCACHE_DIR=~/.cache/sccache
+    - curl -L https://download.videolan.org/contrib/nasm/nasm-2.14.tar.gz | tar xvz
+    - cd nasm-2.14
+    - ./configure CC='sccache gcc' && make -j2 && sudo make install
    - nasm --version
-script:
-    - |
-        cargo build --verbose &&
-        cargo test --verbose &&
-        cargo test --verbose --release --features=decode_test -- --ignored &&
-        cargo bench --verbose &&
-        cargo doc --verbose
+    - cd ..
+    - curl -L https://github.com/SimonKagstrom/kcov/archive/v36.tar.gz | tar xvz
+    - cd kcov-36
+    - mkdir .build && cd .build
+    - cmake -GNinja -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache .. && ninja && sudo ninja install
+    - cd ../..
+    - git clone --depth 1 -b v1.0.0-errata1 https://aomedia.googlesource.com/aom
+    - cd aom
+    - rm -rf CMakeCache.txt CMakeFiles
+    - mkdir -p .build
+    - cd .build
+    - cmake -GNinja .. -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1 -DCMAKE_INSTALL_PREFIX=/usr -DCONFIG_PIC=1
+    - ninja && sudo ninja install
+    - cd ../..
+
+cache:
+  directories:
+  - "$HOME/.cache/sccache"
+
+after_script:
+- sccache -s
+
+jobs:
+  include:
+      - name: "Build & Coveralls"
+        script:
+         - cargo install cargo-kcov
+         - kcov --version
+         - RUSTFLAGS="-C link-dead-code" cargo build --features=decode_test,quick_test --tests --verbose
+         - travis_wait cargo kcov -v --coveralls --no-clean-rebuild -- --verify --exclude-pattern=$HOME/.cargo,aom_build,.h,test
+      - name: "Tests"
+        script: cargo test --verbose --release --features=decode_test -- --ignored
+      - name: "Bench"
+        script: cargo bench --features=bench --verbose
+      - name: "Doc & Clippy (linter): verifying code quality"
+        script:
+         - cargo doc --verbose --no-deps
+         - rustup component add clippy
+         - cargo clippy --version
+         - cargo clippy -- -D warnings -A clippy::cast_lossless -A clippy::cast_ptr_alignment -A clippy::cognitive_complexity -A clippy::needless_range_loop -A clippy::too_many_arguments -A clippy::verbose_bit_mask -A clippy::unreadable_literal --verbose
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,48 +2,72 @@
 name = "rav1e"
 version = "0.1.0"
 authors = ["Thomas Daede <tdaede@xiph.org>"]
+edition = "2018"
 build = "build.rs"
-include = ["/src/**", "/aom_build/**", "/Cargo.toml"]
+include = ["/src/**", "/Cargo.toml", "/build.rs"]
+license = "BSD-2-Clause"
+description = "The fastest and safest AV1 encoder"
+repository = "https://github.com/xiph/rav1e/"
 autobenches = false
 autobins = false

 [features]
-repl = ["rustyline", "binaries"]
-comparative_bench = []
-decode_test = ["bindgen"]
-binaries = ["y4m", "clap"]
-default = ["binaries"]
+decode_test = ["aom-sys"]
+decode_test_dav1d = ["dav1d-sys"]
+binaries = ["ivf", "y4m", "clap", "scan_fmt", "serde_json"]
+default = ["binaries", "nasm", "signal_support"]
+nasm = ["nasm-rs"]
+signal_support = ["signal-hook"]
+dump_ivf = ["ivf"]
+quick_test = []
+desync_finder = []
+bench = []

 [dependencies]
+arg_enum_proc_macro = "0.1.1"
 bitstream-io = "0.8"
-clap = { version = "2", optional = true }
+clap = { version = "2", optional = true, default-features = false }
 libc = "0.2"
-rand = "0.5"
-rustyline = { version = "1", optional = true }
-y4m = { version = "0.3", optional = true }
+y4m = { version = "0.3.2", optional = true }
 backtrace = "0.3"
+syn = "^0.15.20"
+quote = "^0.6.10" # hack for proc-macro-hack
 num-traits = "0.2"
+num-derive = "0.2"
+paste = "0.1"
+serde = "1.0"
+serde_derive = "1.0"
+serde_json = { version = "1.0", optional = true }
+dav1d-sys = { version = "0.2", optional = true }
+aom-sys = { version = "0.1.2", optional = true }
+scan_fmt = { version = "0.2", optional = true }
+ivf = { version = "0.1", path = "ivf/", optional = true }
+avformat-sys = { version = "0.1", path = "crates/avformat-sys/", optional = true }
+rayon = "1.0"
+bincode = "1.1"
+arrayvec = "0.4.10"

 [build-dependencies]
-cmake = "0.1.32"
-
-[target.'cfg(target_arch = "x86_64")'.build-dependencies]
-nasm-rs = { git = "https://github.com/tdaede/nasm-rs.git" }
+nasm-rs = { version = "0.1", path = "crates/nasm_rs/", optional = true }
+vergen = "3"

 [target.'cfg(unix)'.build-dependencies]
 pkg-config = "0.3.12"
-bindgen = { version = "0.37", optional = true }
+
+[target.'cfg(unix)'.dependencies]
+signal-hook = { version = "0.1.9", optional = true }

 [dev-dependencies]
 criterion = "0.2"
+pretty_assertions = "0.6"
+interpolate_name = "0.2.2"
+rand = "0.6"
+rand_chacha = "0.1"
+semver = "0.9"

 [[bin]]
 name = "rav1e"
-bench = false
-
-[[bin]]
-name = "rav1repl"
-required-features = ["repl"]
+required-features = ["binaries"]
 bench = false

 [lib]
@@ -53,10 +77,18 @@ bench = false
 name = "bench"
 harness = false

+[profile.dev]
+opt-level = 2
+
 [profile.release]
 codegen-units = 1  # if > 1 enables parallel code generation which improves
                   # compile times, but prevents some optimizations.
                   # Passes `-C codegen-units`. Ignored when `lto = true`.
+lto = true

 [profile.bench]
 codegen-units = 1
+lto = true
+
+[workspace]
+members = [".", "ivf", "crates/nasm_rs", "crates/avformat-sys"]
--- a/LICENSE
+++ b/LICENSE
 BSD 2-Clause License

-Copyright (c) 2017-2018, the rav1e contributors
+Copyright (c) 2017-2019, the rav1e contributors
 All rights reserved.

 Redistribution and use in source and binary forms, with or without

--- a/README.md
+++ b/README.md
 The fastest and safest AV1 encoder.

-[![Build Status](https://travis-ci.org/xiph/rav1e.svg?branch=master)](https://travis-ci.org/xiph/rav1e)
+[![Travis Build Status](https://travis-ci.org/xiph/rav1e.svg?branch=master)](https://travis-ci.org/xiph/rav1e)
+[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/xiph/rav1e?branch=master&svg=true)](https://ci.appveyor.com/project/tdaede/rav1e/history)
+[![Coverage Status](https://coveralls.io/repos/github/xiph/rav1e/badge.svg?branch=master)](https://coveralls.io/github/xiph/rav1e?branch=master)

 # Overview

 rav1e is an experimental AV1 video encoder. It is designed to eventually cover all use cases, though in its current form it is most suitable for cases where libaom (the reference encoder) is too slow.

-rav1e temporarily uses libaom's transforms and CDF initialization tables, but is otherwise an independent implementation.
-
 # Features

 * Intra and inter frames
 * 64x64 superblocks
-* 4x4 to 32x32 RDO-selected square blocks
-* DC, H, V, Paeth, and smooth prediction modes
-* 4x4 DCT and ADST transforms
+* 4x4 to 64x64 RDO-selected square and 2:1/1:2 rectangular blocks
+* DC, H, V, Paeth, smooth, and a subset of directional prediction modes
+* DCT, ADST and identity transforms (up to 64x64, 16x16 and 32x32 respectively)
 * 8-, 10- and 12-bit depth color
+* 4:2:0 (full support), 4:2:2 and 4:4:4 (limited) chroma sampling
 * Variable speed settings
-* ~10 fps encoding @ 480p
+* Near real-time encoding at high speed levels
+
+# Releases
+
+For the foreseeable future, a weekly pre-release of rav1e will be [published](https://github.com/xiph/rav1e/releases) every Tuesday.

 # Windows builds

@@ -25,21 +30,32 @@ Automated AppVeyor builds can be found [here](https://ci.appveyor.com/project/td

 # Building

-This repository uses a git submodule. To initialize it, run:
+**rav1e** can optionally use either `libaom` (default) or a `dav1d` installation to run some extended tests.
+Some `x86_64`-specific optimizations require a recent version of NASM.
+
+In order to build, test and link to the codec on UNIX, you need Perl, NASM, CMake, Clang and pkg-config. To install this on Ubuntu or Linux Mint, run:

 ```
-git submodule update --init
+sudo apt install perl nasm cmake clang pkg-config
 ```

-This is also required every time you switch branches or pull a submodule change.
+On Windows, pkg-config is not required. A Perl distribution such as Strawberry Perl, CMake, and a NASM binary in your system PATH are required.

-In order to build, test and link to the codec on UNIX, you need Perl, NASM, CMake, Clang and pkg-config. To install this on Ubuntu or Linux Mint, run:
+To build release binary in `target/release/rav1e` run:

 ```
-sudo apt install perl nasm cmake clang pkg-config
+cargo build --release
 ```

-On Windows, pkg-config is not required. A Perl distribution such as Strawberry Perl, CMake, and a NASM binary in your system PATH are required.
+## Building the C-API
+
+**rav1e** provides a C-compatible set of library, header and pkg-config file.
+
+To build and install it you can use [cargo-c](https://crates.io/crates/cargo-c):
+```
+cargo install cargo-c
+cargo cinstall --release
+```

 # Compressing video

@@ -50,16 +66,22 @@ cargo run --release --bin rav1e -- input.y4m -o output.ivf
 ```
 # Decompressing video

-Encoder output should be compatible with any AV1 decoder compliant with the v1.0.0 specification. You can also build the included compatible aomdec using the following:
+Encoder output should be compatible with any AV1 decoder compliant with the v1.0.0 specification. You can build compatible aomdec using the following:

 ```
 mkdir aom_test
 cd aom_test
-cmake ../aom_build/aom -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=0 -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1
+cmake /path/to/aom -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=0 -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1
 make -j8
 ./aomdec ../output.ivf -o output.y4m
 ```

+# Configuring
+
+rav1e has several optional features that can be enabled by passing --features to cargo test. Passing --all-features is discouraged.
+
+* nasm - enabled by default. When enabled, assembly is built for x86_64.
+
 # Using the AOMAnalyzer

 ## Local Analyzer
@@ -80,7 +102,7 @@ https://arewecompressedyet.com/analyzer/?d=https://people.xiph.org/~mbebenita/an

 * src/context.rs - High-level functions that write symbols to the bitstream, and maintain context.
 * src/ec.rs - Low-level implementation of the entropy coder, which directly writes the bitstream.
-* src/lib.rs - The top level library, contains code to write headers, manage buffers, and iterate throught each superblock.
+* src/lib.rs - The top level library, contains code to write headers, manage buffers, and iterate through each superblock.
 * src/partition.rs - Functions and enums to manage partitions (subdivisions of a superblock).
 * src/predict.rs - Intra prediction implementations.
 * src/quantize.rs - Quantization and dequantization functions for coefficients.
@@ -89,46 +111,47 @@ https://arewecompressedyet.com/analyzer/?d=https://people.xiph.org/~mbebenita/an
 * src/util.rs - Misc utility code.
 * src/bin/rav1e.rs - rav1e command line tool.
 * src/bin/rav1erepl.rs - Command line tool for debugging.
-* aom_build/ - Local submodule of libaom. Some C functions and constants are used directly. Also used for benchmarking and testing.

 # Contributing

-## Coding style
-Check code formatting with [rustfmt](https://github.com/rust-lang-nursery/rustfmt) before submitting a PR.
-rav1e currently uses the nightly version of rustfmt.
-
-To install nightly:
+## Toolchain
+rav1e uses the stable version of Rust (the stable toolchain).

+To install the toolchain:
 ```
-rustup install nightly
+rustup install stable
 ```

-To install the nightly version of rustfmt:
+
+## Coding style
+Check code formatting with [rustfmt](https://github.com/rust-lang-nursery/rustfmt) before submitting a PR.
+
+To install the rustfmt:

 ```
-rustup component add rustfmt-preview --toolchain nightly
+rustup component add rustfmt
 ```

 then

 ```
-cargo +nightly fmt -- --check
+cargo fmt -- --check
 ```

-You should also try [clippy](https://github.com/rust-lang-nursery/rust-clippy).
-Rust also uses nightly for clippy.
+
+## Code Analysis
+The [clippy](https://github.com/rust-lang-nursery/rust-clippy) will help catch common mistakes and improve your Rust code.
+
+We recommend you use it before submitting a PR.

 To install clippy:

 ```
-rustup component add clippy-preview --toolchain nightly
+rustup component add clippy
 ```

-then
+then you can search "cargo clippy" in [.travis.yml](https://github.com/xiph/rav1e/blob/master/.travis.yml) for detailed command and run it.

-```
-cargo +nightly clippy
-```

 ## Testing
 Run unit tests with:
@@ -138,17 +161,17 @@ cargo test

 Run encode-decode integration tests with:
 ```
-cargo test --release --features=decode_test -- --ignored
+cargo test --release --features=decode_test
 ```

-Run regular benchmarks with:
+Run the encode-decode tests against `dav1d` with:
 ```
-cargo bench
+cargo test --release --features=decode_test_dav1d
 ```

-Run comparative benchmarks with:
+Run regular benchmarks with:
 ```
-cargo bench --features=comparative_bench
+cargo bench --features=bench
 ```

 # Getting in Touch

--- a/aom @ a6ea77d1
+++ b/aom @ a6ea77d1
-Subproject commit a6ea77d15da5f0c2f74e75147452c382d802565d
--- a/appveyor.yml
+++ b/appveyor.yml
-os: Visual Studio 2017
+image: Visual Studio 2019

 environment:
-    matrix:   
-    - channel: stable
+  host: x86_64-pc-windows-msvc
+  matrix:
+    - platform: x86_64
      target: x86_64-pc-windows-msvc
+      channel: stable
+    - platform: arm64
+      target: aarch64-pc-windows-msvc
+      channel: nightly
+matrix:
+  allow_failures:
+    - platform: arm64

 install:
-    - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+    - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
    - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
-    - appveyor DownloadFile https://www.nasm.us/pub/nasm/releasebuilds/2.13.03/win64/nasm-2.13.03-win64.zip -FileName nasm.zip
+    - appveyor DownloadFile https://people.xiph.org/~tdaede/nasm-2.14.02-win64.zip -FileName nasm.zip
+    - appveyor DownloadFile https://github.com/mozilla/sccache/releases/download/0.2.8/sccache-0.2.8-x86_64-pc-windows-msvc.tar.gz
+    - tar xzf sccache-0.2.8-x86_64-pc-windows-msvc.tar.gz
    - 7z e -y nasm.zip
-    - rustup-init -yv --default-toolchain %channel% --default-host %target%
-    - set PATH=%PATH%;%USERPROFILE%\.cargo\bin;%APPVEYOR_BUILD_FOLDER%
+    - rustup-init -yv --default-toolchain %channel% --default-host %host%
+    - set PATH=%PATH%;%USERPROFILE%\.cargo\bin;%APPVEYOR_BUILD_FOLDER%;nasm-2.14.02;sccache-0.2.8-x86_64-pc-windows-msvc
+    - set RUSTC_WRAPPER=sccache
+    - set SCCACHE_CACHE_SIZE=500M
+    - set SCCACHE_DIR=%LOCALAPPDATA%\Mozilla\sccache
+    - ps: $Env:PKG_CONFIG_ALLOW_CROSS=1
    - rustc -vV
    - cargo -vV
+    - rustup target add %target%
+
+on_success:
+    - sccache -s
+
+cache:
+    - '%LOCALAPPDATA%\Mozilla\sccache'

 build_script:
-    - git submodule update --init
-    - cargo build --release
+    - cargo build --release --target=%target%

 test_script:
-    - git submodule update --init
-    - cargo test --verbose
+    - cargo test --target=%target% --verbose

 artifacts:
-    - path: target\release\rav1e.exe
-      name: rav1e
+    - path: target\$(target)\release\rav1e.exe
+      name: rav1e-$(platform)
+      
+deploy:
+  - provider: GitHub
+    artifact: target\$(target)\release\rav1e.exe
+    auth_token:
+      secure: 'LPBjNyFOg+vBkVR4w+89YVNhByaXBGNwtN6UwkFkWTfPow5oeCbFMtJavU9ZLs+c'
+    prerelease: true
+    on:
+      appveyor_repo_tag: true
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -7,28 +7,24 @@
 // Media Patent License 1.0 was not distributed with this source code in the
 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.

-#[macro_use]
-extern crate criterion;
-extern crate rand;
-extern crate rav1e;
-
 mod predict;
 mod transform;
-mod me;
+mod dist;

-use criterion::*;
-use rav1e::cdef::cdef_filter_frame;
-use rav1e::context::*;
-use rav1e::ec;
-use rav1e::partition::*;
-use rav1e::predict::*;
-use rav1e::rdo::rdo_cfl_alpha;
-use rav1e::*;
+use rav1e::bench::api::*;
+use rav1e::bench::encoder::*;
+use rav1e::bench::cdef::*;
+use rav1e::bench::context::*;
+use rav1e::bench::ec::*;
+use rav1e::bench::partition::*;
+use rav1e::bench::predict::*;
+use rav1e::bench::transform::*;
+use rav1e::bench::rdo::*;

-use transform::transform;
+use crate::transform::transform;

-#[cfg(feature = "comparative_bench")]
-mod comparative;
+use criterion::*;
+use std::time::Duration;

 fn write_b(c: &mut Criterion) {
  for &tx_size in &[TxSize::TX_4X4, TxSize::TX_8X8] {
@@ -40,18 +36,24 @@ fn write_b(c: &mut Criterion) {
 }

 fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
-  unsafe {
-    av1_rtcd();
-    aom_dsp_rtcd();
-  }
-  let config =
-    EncoderConfig { quantizer: qindex, speed: 10, ..Default::default() };
-  let mut fi = FrameInvariants::new(1024, 1024, config);
-  let mut w = ec::WriterEncoder::new();
-  let fc = CDFContext::new(fi.base_q_idx);
-  let bc = BlockContext::new(fi.sb_width * 16, fi.sb_height * 16);
+  let config = EncoderConfig {
+    width: 1024,
+    height: 1024,
+    quantizer: qindex,
+    speed_settings: SpeedSettings::from_preset(10),
+    ..Default::default()
+  };
+  let sequence = Sequence::new(&Default::default());
+  let mut fi = FrameInvariants::<u16>::new(config, sequence);
+  let mut w = WriterEncoder::new();
+  let mut fc = CDFContext::new(fi.base_q_idx);
+  let mut fb = FrameBlocks::new(fi.sb_width * 16, fi.sb_height * 16);
+  let mut tb = fb.as_tile_blocks_mut();
+  let bc = BlockContext::new(&mut tb);
  let mut fs = FrameState::new(&fi);
-  let mut cw = ContextWriter::new(fc, bc);
+  let mut ts = fs.as_tile_state_mut();
+  // For now, restoration unit size is locked to superblock size.
+  let mut cw = ContextWriter::new(&mut fc, bc);

  let tx_type = TxType::DCT_DCT;

@@ -62,8 +64,8 @@ fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
  b.iter(|| {
    for &mode in RAV1E_INTRA_MODES {
      let sbo = SuperBlockOffset { x: sbx, y: sby };
-      fs.qc.update(fi.base_q_idx, tx_size, mode.is_intra(), 8);
      for p in 1..3 {
+        ts.qc.update(fi.base_q_idx, tx_size, mode.is_intra(), 8, fi.dc_delta_q[p], fi.ac_delta_q[p]);
        for by in 0..8 {
          for bx in 0..8 {
            // For ex, 8x8 tx should be applied to even numbered (bx,by)
@@ -74,23 +76,24 @@ fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
            };
            let bo = sbo.block_offset(bx, by);
            let tx_bo = BlockOffset { x: bo.x + bx, y: bo.y + by };
-            let po = tx_bo.plane_offset(&fs.input.planes[p].cfg);
+            let po = tx_bo.plane_offset(&ts.input.planes[p].cfg);
            encode_tx_block(
              &mut fi,
-              &mut fs,
+              &mut ts,
              &mut cw,
              &mut w,
              p,
-              &bo,
+              bo,
              mode,
              tx_size,
              tx_type,
              tx_size.block_size(),
-              &po,
+              po,
              false,
-              8,
              ac,
-              0
+              0,
+              RDOType::PixelDistRealRate,
+              true
            );
          }
        }
@@ -106,14 +109,20 @@ fn cdef_frame(c: &mut Criterion) {
  c.bench_function(&n, move |b| cdef_frame_bench(b, w, h));
 }

-fn cdef_frame_bench(b: &mut Bencher, w: usize, h: usize) {
-  let config =
-    EncoderConfig { quantizer: 100, speed: 10, ..Default::default() };
-  let fi = FrameInvariants::new(w, h, config);
-  let mut bc = BlockContext::new(fi.sb_width * 16, fi.sb_height * 16);
+fn cdef_frame_bench(b: &mut Bencher, width: usize, height: usize) {
+  let config = EncoderConfig {
+    width,
+    height,
+    quantizer: 100,
+    speed_settings: SpeedSettings::from_preset(10),
+    ..Default::default()
+  };
+  let sequence = Sequence::new(&Default::default());
+  let fi = FrameInvariants::<u16>::new(config, sequence);
+  let fb = FrameBlocks::new(fi.sb_width * 16, fi.sb_height * 16);
  let mut fs = FrameState::new(&fi);

-  b.iter(|| cdef_filter_frame(&fi, &mut fs.rec, &mut bc, 8));
+  b.iter(|| cdef_filter_frame(&fi, &mut fs.rec, &fb));
 }

 fn cfl_rdo(c: &mut Criterion) {
@@ -129,12 +138,44 @@ fn cfl_rdo(c: &mut Criterion) {
 }

 fn cfl_rdo_bench(b: &mut Bencher, bsize: BlockSize) {
-  let config =
-    EncoderConfig { quantizer: 100, speed: 10, ..Default::default() };
-  let fi = FrameInvariants::new(1024, 1024, config);
+  let config = EncoderConfig {
+    width: 1024,
+    height: 1024,
+    quantizer: 100,
+    speed_settings: SpeedSettings::from_preset(10),
+    ..Default::default()
+  };
+  let sequence = Sequence::new(&Default::default());
+  let fi = FrameInvariants::<u16>::new(config, sequence);
  let mut fs = FrameState::new(&fi);
+  let mut ts = fs.as_tile_state_mut();
  let offset = BlockOffset { x: 1, y: 1 };
-  b.iter(|| rdo_cfl_alpha(&mut fs, &offset, bsize, 8))
+  b.iter(|| rdo_cfl_alpha(&mut ts, offset, bsize, fi.sequence.bit_depth))
+}
+
+fn ec_bench(c: &mut Criterion) {
+    c.bench_function("update_cdf_4_native", update_cdf_4_native);
+    c.bench_function("update_cdf_4_sse2", update_cdf_4_sse2);
+}
+
+fn update_cdf_4_native(b: &mut Bencher) {
+    let mut cdf = [7296, 3819, 1616, 0, 0];
+    b.iter(|| {
+        for i in 0..1000 {
+            WriterBase::<WriterRecorder>::update_cdf(&mut cdf, i & 3);
+            black_box(cdf);
+        }
+    });
+}
+
+fn update_cdf_4_sse2(b: &mut Bencher) {
+    let mut cdf = [7296, 3819, 1616, 0, 0];
+    b.iter(|| {
+        for i in 0..1000 {
+            WriterBase::<WriterRecorder>::update_cdf_4_sse2(&mut cdf, i & 3);
+            black_box(cdf);
+        }
+    });
 }

 criterion_group!(intra_prediction, predict::pred_bench,);
@@ -142,10 +183,12 @@ criterion_group!(intra_prediction, predict::pred_bench,);
 criterion_group!(cfl, cfl_rdo);
 criterion_group!(cdef, cdef_frame);
 criterion_group!(write_block, write_b);
-criterion_group!(me, me::get_sad);
+criterion_group!{ name = me;
+                  config = Criterion::default().warm_up_time(Duration::new(1,0));
+                  targets = dist::get_sad
+}
+
+criterion_group!(ec, ec_bench);

-#[cfg(feature = "comparative_bench")]
-criterion_main!(comparative::intra_prediction);
+criterion_main!(write_block, intra_prediction, cdef, cfl, me, transform, ec);

-#[cfg(not(feature = "comparative_bench"))]
-criterion_main!(write_block, intra_prediction, cdef, cfl, me, transform);
--- a/benches/comparative/predict.rs
+++ b/benches/comparative/predict.rs
-// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
-//
-// This source code is subject to the terms of the BSD 2 Clause License and
-// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-// was not distributed with this source code in the LICENSE file, you can
-// obtain it at www.aomedia.org/license/software. If the Alliance for Open
-// Media Patent License 1.0 was not distributed with this source code in the
-// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-
-use comparative::libc;
-use criterion::*;
-use predict as predict_native;
-use predict::*;
-use rand::{ChaChaRng, Rng, SeedableRng};
-
-extern {
-  fn highbd_dc_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_h_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_v_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_paeth_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_smooth_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_smooth_h_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn highbd_smooth_v_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-
-  fn cfl_predict_hbd_c(
-    ac_buf_q3: *const i16, dst: *mut u16, stride: libc::ptrdiff_t,
-    alpha_q3: libc::c_int, bd: libc::c_int, bw: libc::c_int, bh: libc::c_int
-  );
-}
-
-fn predict_intra_4x4_aom(
-  b: &mut Bencher,
-  predictor: unsafe extern fn(
-    *mut u16,
-    libc::ptrdiff_t,
-    libc::c_int,
-    libc::c_int,
-    *const u16,
-    *const u16,
-    libc::c_int
-  )
-) {
-  let mut rng = ChaChaRng::from_seed([0; 32]);
-  let (mut block, above_context, left_context) = generate_block(&mut rng);
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      unsafe {
-        predictor(
-          block.as_mut_ptr(),
-          BLOCK_SIZE.width() as libc::ptrdiff_t,
-          4,
-          4,
-          above_context.as_ptr(),
-          left_context.as_ptr(),
-          8
-        );
-      }
-    }
-  })
-}
-
-pub fn intra_bench(c: &mut Criterion) {
-  c.bench_functions(
-    "intra_dc_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_dc_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| {
-        predict_intra_4x4_aom(b, highbd_dc_predictor)
-      }),
-    ],
-    None
-  );
-  c.bench_functions(
-    "intra_h_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_h_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| {
-        predict_intra_4x4_aom(b, highbd_h_predictor)
-      }),
-    ],
-    None
-  );
-  c.bench_functions(
-    "intra_v_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_v_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| {
-        predict_intra_4x4_aom(b, highbd_v_predictor)
-      }),
-    ],
-    None
-  );
-  c.bench_functions(
-    "intra_paeth_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_paeth_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| {
-        predict_intra_4x4_aom(b, highbd_paeth_predictor)
-      }),
-    ],
-    None
-  );
-  c.bench_functions(
-    "intra_smooth_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_smooth_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| {
-        predict_intra_4x4_aom(b, highbd_smooth_predictor)
-      }),
-    ],
-    None
-  );
-  c.bench_functions(
-    "intra_smooth_h_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_smooth_h_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| {
-        predict_intra_4x4_aom(b, highbd_smooth_h_predictor)
-      }),
-    ],
-    None
-  );
-  c.bench_functions(
-    "intra_smooth_v_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_smooth_v_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| {
-        predict_intra_4x4_aom(b, highbd_smooth_v_predictor)
-      }),
-    ],
-    None
-  );
-  c.bench_functions(
-    "intra_cfl_4x4",
-    vec![
-      Fun::new("native", |b, _: &Option<usize>| {
-        predict_native::intra_cfl_4x4(b)
-      }),
-      Fun::new("aom", |b, _: &Option<usize>| intra_cfl_4x4_aom(b)),
-    ],
-    None
-  );
-}
-
-pub fn intra_cfl_4x4_aom(b: &mut Bencher) {
-  let mut rng = ChaChaRng::from_seed([0; 32]);
-  let (mut block, _above_context, _left_context) = generate_block(&mut rng);
-  let ac: Vec<i16> = (0..(32 * 32)).map(|_| rng.gen()).collect();
-  let alpha = -1 as i16;
-
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      unsafe {
-        cfl_predict_hbd_c(
-          ac.as_ptr(),
-          block.as_mut_ptr(),
-          BLOCK_SIZE.width() as libc::ptrdiff_t,
-          alpha as libc::c_int,
-          8,
-          4,
-          4
-        );
-      }
-    }
-  })
-}
--- a/benches/me.rs
+++ b/benches/me.rs
@@ -8,22 +8,25 @@
 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.

 use criterion::*;
-use partition::*;
-use plane::*;
-use rand::{ChaChaRng, Rng, SeedableRng};
-use rav1e::me;
+use rav1e::bench::dist;
+use rav1e::bench::partition::*;
+use rav1e::bench::partition::BlockSize::*;
+use rav1e::bench::frame::*;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaChaRng;
+use rav1e::Pixel;

-fn fill_plane(ra: &mut ChaChaRng, plane: &mut Plane) {
+fn fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>) {
  let stride = plane.cfg.stride;
  for row in plane.data.chunks_mut(stride) {
-    for mut pixel in row {
+    for pixel in row {
      let v: u8 = ra.gen();
-      *pixel = v as u16;
+      *pixel = T::cast_from(v);
    }
  }
 }

-fn new_plane(ra: &mut ChaChaRng, width: usize, height: usize) -> Plane {
+fn new_plane<T: Pixel>(ra: &mut ChaChaRng, width: usize, height: usize) -> Plane<T> {
  let mut p = Plane::new(width, height, 0, 0, 128 + 8, 128 + 8);

  fill_plane(ra, &mut p);
@@ -31,49 +34,80 @@ fn new_plane(ra: &mut ChaChaRng, width: usize, height: usize) -> Plane {
  p
 }

-fn bench_get_sad(b: &mut Bencher, bs: &BlockSize) {
+fn run_sad_bench<T: Pixel>(b: &mut Bencher, &(bs, bit_depth): &(BlockSize, usize)) {
  let mut ra = ChaChaRng::from_seed([0; 32]);
  let bsw = bs.width();
  let bsh = bs.height();
  let w = 640;
  let h = 480;
-  let input_plane = new_plane(&mut ra, w, h);
-  let rec_plane = new_plane(&mut ra, w, h);
-  let po = PlaneOffset { x: 0, y: 0 };
+  let input_plane = new_plane::<T>(&mut ra, w, h);
+  let rec_plane = new_plane::<T>(&mut ra, w, h);

-  let plane_org = input_plane.slice(&po);
-  let plane_ref = rec_plane.slice(&po);
+  let plane_org = input_plane.as_region();
+  let plane_ref = rec_plane.as_region();

  b.iter(|| {
-      let _ = me::get_sad(&plane_org, &plane_ref, bsw, bsh);
+    let _ =
+      black_box(dist::get_sad(&plane_org, &plane_ref, bsw, bsh, bit_depth));
  })
 }

+fn bench_get_sad(b: &mut Bencher, &(bs, bit_depth): &(BlockSize, usize)) {
+  if bit_depth <= 8 {
+    run_sad_bench::<u8>(b, &(bs, bit_depth))
+  }
+  else {
+    run_sad_bench::<u16>(b, &(bs, bit_depth))
+  }
+}
+
 pub fn get_sad(c: &mut Criterion) {
-  use partition::BlockSize::*;
  let blocks = vec![
-    BLOCK_4X4,
-    BLOCK_4X8,
-    BLOCK_8X4,
-    BLOCK_8X8,
-    BLOCK_8X16,
-    BLOCK_16X8,
-    BLOCK_16X16,
-    BLOCK_16X32,
-    BLOCK_32X16,
-    BLOCK_32X32,
-    BLOCK_32X64,
-    BLOCK_64X32,
-    BLOCK_64X64,
-    BLOCK_64X128,
-    BLOCK_128X64,
-    BLOCK_128X128,
-    BLOCK_4X16,
-    BLOCK_16X4,
-    BLOCK_8X32,
-    BLOCK_32X8,
-    BLOCK_16X64,
-    BLOCK_64X16,
+    (BLOCK_4X4, 8),
+    (BLOCK_4X8, 8),
+    (BLOCK_8X4, 8),
+    (BLOCK_8X8, 8),
+    (BLOCK_8X16, 8),
+    (BLOCK_16X8, 8),
+    (BLOCK_16X16, 8),
+    (BLOCK_16X32, 8),
+    (BLOCK_32X16, 8),
+    (BLOCK_32X32, 8),
+    (BLOCK_32X64, 8),
+    (BLOCK_64X32, 8),
+    (BLOCK_64X64, 8),
+    (BLOCK_64X128, 8),
+    (BLOCK_128X64, 8),
+    (BLOCK_128X128, 8),
+    (BLOCK_4X16, 8),
+    (BLOCK_16X4, 8),
+    (BLOCK_8X32, 8),
+    (BLOCK_32X8, 8),
+    (BLOCK_16X64, 8),
+    (BLOCK_64X16, 8),
+
+    (BLOCK_4X4, 10),
+    (BLOCK_4X8, 10),
+    (BLOCK_8X4, 10),
+    (BLOCK_8X8, 10),
+    (BLOCK_8X16, 10),
+    (BLOCK_16X8, 10),
+    (BLOCK_16X16, 10),
+    (BLOCK_16X32, 10),
+    (BLOCK_32X16, 10),
+    (BLOCK_32X32, 10),
+    (BLOCK_32X64, 10),
+    (BLOCK_64X32, 10),
+    (BLOCK_64X64, 10),
+    (BLOCK_64X128, 10),
+    (BLOCK_128X64, 10),
+    (BLOCK_128X128, 10),
+    (BLOCK_4X16, 10),
+    (BLOCK_16X4, 10),
+    (BLOCK_8X32, 10),
+    (BLOCK_32X8, 10),
+    (BLOCK_16X64, 10),
+    (BLOCK_64X16, 10)
  ];

  c.bench_function_over_inputs("get_sad", bench_get_sad, blocks);

--- a/benches/predict.rs
+++ b/benches/predict.rs
@@ -8,15 +8,20 @@
 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.

 use criterion::*;
-use rand::{ChaChaRng, Rng, SeedableRng};
-use rav1e::partition::BlockSize;
-use rav1e::predict::{Block4x4, Intra};
+use rand::{Rng, RngCore, SeedableRng};
+use rand_chacha::ChaChaRng;
+use rav1e::bench::partition::BlockSize;
+use rav1e::bench::predict::{Block4x4, Intra};
+use rav1e::bench::frame::*;
+use rav1e::bench::util::*;

-pub const MAX_ITER: usize = 50000;
 pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32;

-pub fn generate_block(rng: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
-  let block = vec![0u16; BLOCK_SIZE.width() * BLOCK_SIZE.height()];
+pub fn generate_block(rng: &mut ChaChaRng) -> (Plane<u16>, Vec<u16>, Vec<u16>) {
+  let block = Plane::wrap(
+    vec![0u16; BLOCK_SIZE.width() * BLOCK_SIZE.height()],
+    BLOCK_SIZE.width(),
+  );
  let above_context: Vec<u16> =
    (0..BLOCK_SIZE.height()).map(|_| rng.gen()).collect();
  let left_context: Vec<u16> =
@@ -25,64 +30,92 @@ pub fn generate_block(rng: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
  (block, above_context, left_context)
 }

+pub fn generate_block_u8<'a>(
+  rng: &mut ChaChaRng, edge_buf: &'a mut AlignedArray<[u8; 65]>
+) -> (Plane<u8>, &'a [u8], &'a [u8]) {
+  let block = Plane::wrap(
+    vec![0u8; BLOCK_SIZE.width() * BLOCK_SIZE.height()],
+    BLOCK_SIZE.width(),
+  );
+  rng.fill_bytes(&mut edge_buf.array);
+  let above_context = &edge_buf.array[33..];
+  let left_context = &edge_buf.array[..32];
+
+  (block, above_context, left_context)
+}
+
+pub fn bench_pred_fn<F>(c: &mut Criterion, id: &str, f: F)
+where
+  F: FnMut(&mut Bencher) + 'static
+{
+  let b = Benchmark::new(id, f);
+  c.bench(
+    id,
+    if id.ends_with("_4x4_u8") {
+      b.throughput(Throughput::Bytes(16))
+    } else if id.ends_with("_4x4") {
+      b.throughput(Throughput::Bytes(32))
+    } else {
+      b
+    }
+  );
+}
+
 pub fn pred_bench(c: &mut Criterion) {
-  c.bench_function("intra_dc_4x4", |b| intra_dc_4x4(b));
-  c.bench_function("intra_dc_left_4x4", |b| intra_dc_left_4x4(b));
-  c.bench_function("intra_dc_top_4x4", |b| intra_dc_top_4x4(b));
-  c.bench_function("intra_h_4x4", |b| intra_h_4x4(b));
-  c.bench_function("intra_v_4x4", |b| intra_v_4x4(b));
-  c.bench_function("intra_paeth_4x4", |b| intra_paeth_4x4(b));
-  c.bench_function("intra_smooth_4x4", |b| intra_smooth_4x4(b));
-  c.bench_function("intra_smooth_h_4x4", |b| intra_smooth_h_4x4(b));
-  c.bench_function("intra_smooth_v_4x4", |b| intra_smooth_v_4x4(b));
-  c.bench_function("intra_cfl_4x4", |b| intra_cfl_4x4(b));
+  bench_pred_fn(c, "intra_dc_4x4", intra_dc_4x4);
+  bench_pred_fn(c, "intra_dc_left_4x4", intra_dc_left_4x4);
+  bench_pred_fn(c, "intra_dc_top_4x4", intra_dc_top_4x4);
+  bench_pred_fn(c, "intra_h_4x4", intra_h_4x4);
+  bench_pred_fn(c, "intra_v_4x4", intra_v_4x4);
+  bench_pred_fn(c, "intra_paeth_4x4", intra_paeth_4x4);
+  bench_pred_fn(c, "intra_smooth_4x4", intra_smooth_4x4);
+  bench_pred_fn(c, "intra_smooth_h_4x4", intra_smooth_h_4x4);
+  bench_pred_fn(c, "intra_smooth_v_4x4", intra_smooth_v_4x4);
+  bench_pred_fn(c, "intra_cfl_4x4", intra_cfl_4x4);
+  bench_pred_fn(c, "intra_dc_4x4_u8", intra_dc_4x4_u8);
+  bench_pred_fn(c, "intra_dc_128_4x4_u8", intra_dc_128_4x4_u8);
+  bench_pred_fn(c, "intra_dc_left_4x4_u8", intra_dc_left_4x4_u8);
+  bench_pred_fn(c, "intra_dc_top_4x4_u8", intra_dc_top_4x4_u8);
+  bench_pred_fn(c, "intra_h_4x4_u8", intra_h_4x4_u8);
+  bench_pred_fn(c, "intra_v_4x4_u8", intra_v_4x4_u8);
+  bench_pred_fn(c, "intra_paeth_4x4_u8", intra_paeth_4x4_u8);
+  bench_pred_fn(c, "intra_smooth_4x4_u8", intra_smooth_4x4_u8);
+  bench_pred_fn(c, "intra_smooth_h_4x4_u8", intra_smooth_h_4x4_u8);
+  bench_pred_fn(c, "intra_smooth_v_4x4_u8", intra_smooth_v_4x4_u8);
 }

 pub fn intra_dc_4x4(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (mut block, above, left) = generate_block(&mut ra);
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_dc(
-        &mut block,
-        BLOCK_SIZE.width(),
-        &above[..4],
-        &left[..4]
-      );
-    }
+    Block4x4::pred_dc(&mut block.as_region_mut(), &above[..4], &left[..4]);
  })
 }

 pub fn intra_dc_left_4x4(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (mut block, above, left) = generate_block(&mut ra);
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_dc_left(
-        &mut block,
-        BLOCK_SIZE.width(),
-        &above[..4],
-        &left[..4]
-      );
-    }
+    Block4x4::pred_dc_left(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
  })
 }

 pub fn intra_dc_top_4x4(b: &mut Bencher) {
-  let mut ra = ChaChaRng::from_seed([0; 32]);
-  let (mut block, above, left) = generate_block(&mut ra);
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_dc_top(
-        &mut block,
-        BLOCK_SIZE.width(),
-        &above[..4],
-        &left[..4]
-      );
-    }
+    Block4x4::pred_dc_top(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
  })
 }

@@ -91,9 +124,7 @@ pub fn intra_h_4x4(b: &mut Bencher) {
  let (mut block, _above, left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_h(&mut block, BLOCK_SIZE.width(), &left[..4]);
-    }
+    Block4x4::pred_h(&mut block.as_region_mut(), &left[..4]);
  })
 }

@@ -102,9 +133,7 @@ pub fn intra_v_4x4(b: &mut Bencher) {
  let (mut block, above, _left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_v(&mut block, BLOCK_SIZE.width(), &above[..4]);
-    }
+    Block4x4::pred_v(&mut block.as_region_mut(), &above[..4]);
  })
 }

@@ -114,15 +143,12 @@ pub fn intra_paeth_4x4(b: &mut Bencher) {
  let above_left = unsafe { *above.as_ptr().offset(-1) };

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_paeth(
-        &mut block,
-        BLOCK_SIZE.width(),
-        &above[..4],
-        &left[..4],
-        above_left
-      );
-    }
+    Block4x4::pred_paeth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4],
+      above_left
+    );
  })
 }

@@ -131,14 +157,11 @@ pub fn intra_smooth_4x4(b: &mut Bencher) {
  let (mut block, above, left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_smooth(
-        &mut block,
-        BLOCK_SIZE.width(),
-        &above[..4],
-        &left[..4]
-      );
-    }
+    Block4x4::pred_smooth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
  })
 }

@@ -147,14 +170,11 @@ pub fn intra_smooth_h_4x4(b: &mut Bencher) {
  let (mut block, above, left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_smooth_h(
-        &mut block,
-        BLOCK_SIZE.width(),
-        &above[..4],
-        &left[..4]
-      );
-    }
+    Block4x4::pred_smooth_h(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
  })
 }

@@ -163,26 +183,158 @@ pub fn intra_smooth_v_4x4(b: &mut Bencher) {
  let (mut block, above, left) = generate_block(&mut rng);

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_smooth_v(
-        &mut block,
-        BLOCK_SIZE.width(),
-        &above[..4],
-        &left[..4]
-      );
-    }
+    Block4x4::pred_smooth_v(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
  })
 }

 pub fn intra_cfl_4x4(b: &mut Bencher) {
  let mut rng = ChaChaRng::from_seed([0; 32]);
-  let (mut block, _above, _left) = generate_block(&mut rng);
+  let (mut block, above, left) = generate_block(&mut rng);
  let ac: Vec<i16> = (0..(32 * 32)).map(|_| rng.gen()).collect();
  let alpha = -1 as i16;

  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      Block4x4::pred_cfl(&mut block, BLOCK_SIZE.width(), &ac, alpha, 8);
-    }
+    Block4x4::pred_cfl(
+      &mut block.as_region_mut(),
+      &ac,
+      alpha,
+      8,
+      &above,
+      &left
+    );
+  })
+}
+
+pub fn intra_dc_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_dc(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+
+pub fn intra_dc_128_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, _above, _left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_dc_128(&mut block.as_region_mut(), 8);
+  })
+}
+
+pub fn intra_dc_left_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_dc_left(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+
+pub fn intra_dc_top_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_dc_top(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+
+pub fn intra_h_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, _above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_h(&mut block.as_region_mut(), &left[32 - 4..]);
+  })
+}
+
+pub fn intra_v_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, _left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_v(&mut block.as_region_mut(), &above[..4]);
+  })
+}
+
+pub fn intra_paeth_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  let above_left = unsafe { *above.as_ptr().offset(-1) };
+
+  b.iter(|| {
+    Block4x4::pred_paeth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..],
+      above_left
+    );
+  })
+}
+
+pub fn intra_smooth_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_smooth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+
+pub fn intra_smooth_h_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_smooth_h(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+
+pub fn intra_smooth_v_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+
+  b.iter(|| {
+    Block4x4::pred_smooth_v(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
  })
 }
--- a/benches/transform.rs
+++ b/benches/transform.rs
@@ -8,8 +8,9 @@
 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.

 use criterion::*;
-use rand::{ChaChaRng, Rng, SeedableRng};
-use rav1e::transform;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaChaRng;
+use rav1e::bench::transform;

 fn bench_idct4(b: &mut Bencher, bit_depth: &usize) {
  let mut ra = ChaChaRng::from_seed([0; 32]);

--- a/build.rs
+++ b/build.rs
 // build.rs

-extern crate cmake;
-#[cfg(unix)]
-extern crate pkg_config;
-#[cfg(unix)]
-#[cfg(feature = "decode_test")]
-extern crate bindgen;
-#[cfg(target_arch = "x86_64")]
-extern crate nasm_rs;
-
+#[allow(unused_imports)]
 use std::env;
 use std::fs;
 use std::path::Path;

-fn main() {
-    #[cfg(target_arch = "x86_64")] {
-        use std::fs::File;
-        use std::io::Write;
-        let out_dir = env::var("OUT_DIR").unwrap();
-        {
-            let dest_path = Path::new(&out_dir).join("config.asm");
-            let mut config_file = File::create(dest_path).unwrap();
-            config_file.write(b"	%define ARCH_X86_32 0\n").unwrap();
-            config_file.write(b" %define ARCH_X86_64 1\n").unwrap();
-            config_file.write(b"	%define PIC 1\n").unwrap();
-            config_file.write(b" %define STACK_ALIGNMENT 32\n").unwrap();
-        }
-        let mut config_include_arg = String::from("-I");
-        config_include_arg.push_str(&out_dir);
-        config_include_arg.push('/');
-        nasm_rs::compile_library_args("rav1easm", &["src/x86/mc.asm"], &[&config_include_arg, "-Isrc/"]);
-    }
-
-    if cfg!(windows) && cfg!(feature = "decode_test") {
-        panic!("Unsupported feature on this platform!");
-    }
+#[allow(dead_code)]
+fn rerun_dir<P: AsRef<Path>>(dir: P) {
+    for entry in fs::read_dir(dir).unwrap() {
+        let entry = entry.unwrap();
+        let path = entry.path();
+        println!("cargo:rerun-if-changed={}", path.to_string_lossy());

-    let cargo_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
-    let build_path = Path::new(&cargo_dir).join("aom_build/aom");
-    let debug = if let Some(v) = env::var("PROFILE").ok() {
-        match v.as_str() {
-            "bench" | "release" => false,
-            _ => true,
+        if path.is_dir() {
+            rerun_dir(path);
        }
-    } else {
-        false
-    };
-
-    let dst = cmake::Config::new(build_path)
-        .define("CONFIG_DEBUG", (debug as u8).to_string())
-        .define("CONFIG_ANALYZER", "0")
-        .define("ENABLE_DOCS", "0")
-        .define("ENABLE_NASM", "1")
-        .define("ENABLE_TESTS", "0")
-        .no_build_target(cfg!(windows))
-        .build();
-
-    // Dirty hack to force a rebuild whenever the defaults are changed upstream
-    let _ = fs::remove_file(dst.join("build/CMakeCache.txt"));
-
-    #[cfg(windows)] {
-        println!("cargo:rustc-link-search=native={}", dst.join("build").to_str().unwrap());
-        println!("cargo:rustc-link-search=native={}", dst.join("build/Debug").to_str().unwrap());
-        println!("cargo:rustc-link-search=native={}", dst.join("build/Release").to_str().unwrap());
-        println!("cargo:rustc-link-lib=static=aom");
    }
+}

-    #[cfg(unix)] {
-        env::set_var("PKG_CONFIG_PATH", dst.join("lib/pkgconfig"));
-        let _libs = pkg_config::Config::new().statik(true).probe("aom").unwrap();
-
-        #[cfg(feature = "decode_test")] {
-            use std::io::Write;
-
-            let out_dir = env::var("OUT_DIR").unwrap();
-
-            let headers = _libs.include_paths.clone();
-
-            let mut builder = bindgen::builder()
-                .blacklist_type("max_align_t")
-                .rustfmt_bindings(false)
-                .header("data/aom.h");
-
-            for header in headers {
-                builder = builder.clang_arg("-I").clang_arg(header.to_str().unwrap());
-            }
-
-            // Manually fix the comment so rustdoc won't try to pick them
-            let s = builder
-                .generate()
-                .unwrap()
-                .to_string()
-                .replace("/**", "/*")
-                .replace("/*!", "/*");
-
-            let dest_path = Path::new(&out_dir).join("aom.rs");
-
-            let mut file = fs::File::create(dest_path).unwrap();
+#[cfg(feature = "nasm")]
+fn build_nasm_files() {
+  use std::fs::File;
+  use std::io::Write;
+  let out_dir = env::var("OUT_DIR").unwrap();
+  {
+      let dest_path = Path::new(&out_dir).join("config.asm");
+      let mut config_file = File::create(dest_path).unwrap();
+      config_file.write(b"	%define private_prefix rav1e\n").unwrap();
+      config_file.write(b"	%define ARCH_X86_32 0\n").unwrap();
+      config_file.write(b" %define ARCH_X86_64 1\n").unwrap();
+      config_file.write(b"	%define PIC 1\n").unwrap();
+      config_file.write(b" %define STACK_ALIGNMENT 16\n").unwrap();
+      if cfg!(target_os="macos") {
+        config_file.write(b" %define PREFIX 1\n").unwrap();
+      }
+  }
+  let mut config_include_arg = String::from("-I");
+  config_include_arg.push_str(&out_dir);
+  config_include_arg.push('/');
+  nasm_rs::compile_library_args(
+      "rav1easm",
+      &[
+          "src/x86/data.asm",
+          "src/x86/ipred.asm",
+          "src/x86/itx.asm",
+          "src/x86/mc.asm",
+          "src/x86/me.asm",
+          "src/x86/sad_sse2.asm",
+          "src/x86/sad_avx.asm"
+      ],
+      &[&config_include_arg, "-Isrc/"]
+  );
+  println!("cargo:rustc-link-lib=static=rav1easm");
+  rerun_dir("src/x86");
+  rerun_dir("src/ext/x86");
+}

-            let _ = file.write(s.as_bytes());
-        }
+#[allow(unused_variables)]
+fn main() {
+    let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
+    let os = env::var("CARGO_CFG_TARGET_OS").unwrap();
+    // let env = env::var("CARGO_CFG_TARGET_ENV").unwrap();
+
+    #[cfg(feature = "nasm")] {
+      if arch == "x86_64" {
+        build_nasm_files()
+      }
    }

-    fn rerun_dir<P: AsRef<Path>>(dir: P) {
-        for entry in fs::read_dir(dir).unwrap() {
-            let entry = entry.unwrap();
-            let path = entry.path();
-            println!("cargo:rerun-if-changed={}", path.to_string_lossy());
-
-            if path.is_dir() {
-                rerun_dir(path);
-            }
-        }
+    if os == "windows" && cfg!(feature = "decode_test") {
+        panic!("Unsupported feature on this platform!");
    }

-    rerun_dir("aom_build");
+    vergen::generate_cargo_keys(vergen::ConstantsFlags::all()).expect("Unable to generate the cargo keys!");
 }
--- a/build.sh
+++ b/build.sh
@@ -5,11 +5,21 @@ set -e

 #SEQ=!!!!! ENTER YOUR FAVORITE Y4M HERE !!!!!

+IS_RELEASE=1
+
+for arg in "$@"; do
+  shift
+  case "$arg" in
+    "--debug") IS_RELEASE=0 ;;
+    *)        set -- "$@" "$arg"
+  esac
+done
+
 if [[ -z "${SEQ}" ]]; then
  SEQ=nyan.y4m
  SEQ10=nyan10.y4m
  SEQ12=nyan12.y4m
-  
+
  wget -nc https://mf4.xiph.org/~ltrudeau/videos/nyan.y4m
  #wget -nc https://people.xiph.org/~tdaede/nyan10.y4m
  #wget -nc https://people.xiph.org/~tdaede/nyan12.y4m
@@ -22,48 +32,6 @@ if [ ! -f $SEQ ]; then
  exit 1 # terminate and indicate error
 fi

-# Hide githash to detect version changes
-GITHASH=".git/rav1e.githash"
-
-# Get previous version
-EXPECTED_VERSION="42"
-if [ -f $GITHASH ]; then
-  EXPECTED_VERSION=$(cat $GITHASH)
-fi
-
-# Get current version
-ACTUAL_VERSION=$(git submodule status | xargs)
-
-AOM_TEST="aom_test"
-if [[ "$ACTUAL_VERSION" != "$EXPECTED_VERSION" ]] || [[ ! -f ./${AOM_TEST}/aomdec ]]; then
-
-# Store current version to file
-echo $ACTUAL_VERSION > $GITHASH
-
-# Update aombuild
-git submodule update --init
-
-# Clean project files
-cargo clean
-
-# Get configure command from readme
-CONFIGURE_CMD=$(fgrep "cmake ../aom" README.md)
-
-# Wipe and create aom_test folder
-rm -fR $AOM_TEST
-mkdir -p $AOM_TEST
-pushd $AOM_TEST
-
-echo CONFIGURE COMMAND
-echo $CONFIGURE_CMD
-eval $CONFIGURE_CMD
-
-# auto detect the number of cores and parallel build
-make -j$(nproc --all)
-popd
-
-fi
-
 # File containing the encoded sequence
 ENC_FILE="enc_file.ivf"
 # File containing the reconstructed sequence
@@ -75,13 +43,20 @@ DEC_FILE="dec_file.y4m"
 export RUST_BACKTRACE=1

 # Build and run encoder
-cargo run --bin rav1e --release -- $SEQ -o $ENC_FILE -s 3 -r $REC_FILE
+BUILD_TYPE=""
+if [ $IS_RELEASE == 1 ]; then
+  BUILD_TYPE="--release"
+fi
+
+cargo run --bin rav1e $BUILD_TYPE -- $SEQ -o $ENC_FILE -s 3 -r $REC_FILE

 # Decode
-${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
+aomdec $ENC_FILE -o $DEC_FILE

 # Input/Output compare
-cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
+tail -n+2 $DEC_FILE > /tmp/dec_file
+tail -n+2 $REC_FILE > /tmp/rec_file
+cmp /tmp/dec_file /tmp/rec_file || (printf '\e[1;31m%-6s\e[m\n\n' 'Desync detected!!!' && exit 1)

 # Daala tools support coming soon
 #DAALA_TOOLS="../daala/tools/"
@@ -100,11 +75,11 @@ mpv --loop $DEC_FILE

 # Repeat for high bit depth clips
 #cargo run --bin rav1e --release -- $SEQ10 -o $ENC_FILE -s 3 -r $REC_FILE
-#${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
+#aomdec $ENC_FILE -o $DEC_FILE
 #cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
 #mpv --loop $DEC_FILE

 #cargo run --bin rav1e --release -- $SEQ12 -o $ENC_FILE -s 3 -r $REC_FILE
-#${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
+#aomdec $ENC_FILE -o $DEC_FILE
 #cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
 #mpv --loop $DEC_FILE
--- a/cbindgen.toml
+++ b/cbindgen.toml
+header = "// SPDX-License-Identifier: MIT"
+sys_includes = ["stddef.h", "stdint.h", "stdlib.h"]
+no_includes = true
+include_guard = "RAV1E_H"
+tab_width = 4
+style = "Type"
+language = "C"
+
+[parse]
+parse_deps = true
+include = ['rav1e']
+
+[export]
+prefix = "Ra"
+item_types = ["enums", "structs", "unions", "typedefs", "opaque", "functions"]
+
+[enum]
+rename_variants = "ScreamingSnakeCase"
+prefix_with_name = true
--- a/clippy.toml
+++ b/clippy.toml
 single-char-binding-names-threshold = 10
 too-many-arguments-threshold = 16
-cyclomatic-complexity-threshold = 40
+cognitive-complexity-threshold = 40
+trivial-copy-size-limit = 16 # 128-bits = 2 64-bit registers
\ No newline at end of file
--- a/crates/avformat-sys/Cargo.toml
+++ b/crates/avformat-sys/Cargo.toml
+[package]
+name = "avformat-sys"
+version = "0.1.0"
+authors = ["Luca Barbato <lu_zero@gentoo.org>"]
+license = "MIT"
+description = "FFI bindings to ffmpeg"
+edition = "2018"
+build = "build.rs"
+
+[package.metadata.pkg-config]
+libavformat = "58.18.102"
+libavcodec = "58.0.0"
+libavutil = "56.0.0"
+
+[features]
+build_sources = []
+
+[build-dependencies]
+bindgen = "0.49"
+metadeps = "1.1.2"
+
+[dependencies]
No results found