Compare revisions

6c76fbd0 · e4674776 · bf537275 · c2d7b6af · 353ba7aa · afe4f2fc
--- a/.editorconfig
+++ b/.editorconfig
+root = true
+[*]
+charset = utf-8
+end_of_line = lf
+indent_size = 2
+indent_style = space
+# Some tools that work with text may not work correctly without the newline
+# control character at the end of the last line. This character helps them to
+# understand that the line is complete and can be considered as a line.
+insert_final_newline = true
+trim_trailing_whitespace = true
+[*.md]
+# Trailing whitespace may have a special meaning. For example, two spaces at the
+# end of a line means a line break.
+trim_trailing_whitespace = false
+[*.py]
+indent_size = 4
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ target
 Cargo.lock
 *.y4m
 *.ivf
+secret_key
--- a/.gitmodules
+++ b/.gitmodules
-[submodule "aom"]
-	path = aom_build/aom
-	url = https://gitlab.xiph.org/xiph/aom-rav1e.git
-	branch = rav1e_16b
--- a/.mailmap
+++ b/.mailmap
+Luca Barbato <lu_zero@gentoo.org> <luca.barbato@gmail.com>
+Frank Bossen <fbossen@gmail.com> <frank@bossentech.com>
+Yushin Cho <ycho@mozilla.com> <cho.yushin@gmail.com>
+Yushin Cho <ycho@mozilla.com> <ycho@users.noreply.github.com>
+Thomas Daede <tdaede@xiph.org> <daede003@umn.edu>
+Nathan E. Egge <negge@xiph.org> <negge@dgql.org>
+Josh Holmer <jholmer.in@gmail.com>
+Josh Holmer <jholmer.in@gmail.com> <jholmer@dminc.com>
+Monty Montgomery <monty@xiph.org> <xiphmont@gmail.com>
+Thomas Szymczak <11669680+tszymczak@users.noreply.github.com> Thomas Szymczak <you@example.com>
+Raphaël Zumer <rzumer@tebako.net>
+Raphaël Zumer <rzumer@tebako.net> <rzumer@gmail.com>
--- a/.travis.yml
+++ b/.travis.yml
 language: rust
+rust:
+  - 1.35.0
+env:
+  - RUST_BACKTRACE=1
 addons:
  apt:
-    packages:
+    packages: binutils-dev libcurl4-openssl-dev zlib1g-dev libdw-dev libiberty-dev ninja-build
-      - yasm
-      - nasm
 before_install:
-    - wget -O cmake.sh https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.sh
+    - wget -O cmake.sh https://cmake.org/files/v3.13/cmake-3.13.3-Linux-x86_64.sh
    - sudo sh cmake.sh --skip-license --exclude-subdir --prefix=/usr
    - sudo rm -fR /usr/local/cmake*
    - hash -r
    - which cmake
    - cmake --version
-script:
+    - curl -L https://github.com/mozilla/sccache/releases/download/0.2.8/sccache-0.2.8-x86_64-unknown-linux-musl.tar.gz | tar xvz
-    - |
+    - export PATH=$PATH:`pwd`/sccache-0.2.8-x86_64-unknown-linux-musl
-        cargo build --verbose &&
+    - export RUSTC_WRAPPER=sccache
-        cargo test --verbose &&
+    - export SCCACHE_CACHE_SIZE=500M
-        cargo test --verbose --release --features=decode_test -- --ignored &&
+    - export SCCACHE_DIR=~/.cache/sccache
-        cargo bench --verbose &&
+    - curl -L https://download.videolan.org/contrib/nasm/nasm-2.14.tar.gz | tar xvz
-        cargo doc --verbose
+    - cd nasm-2.14
+    - ./configure CC='sccache gcc' && make -j2 && sudo make install
+    - nasm --version
+    - cd ..
+    - curl -L https://github.com/SimonKagstrom/kcov/archive/v36.tar.gz | tar xvz
+    - cd kcov-36
+    - mkdir .build && cd .build
+    - cmake -GNinja -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache .. && ninja && sudo ninja install
+    - cd ../..
+    - git clone --depth 1 -b v1.0.0-errata1 https://aomedia.googlesource.com/aom
+    - cd aom
+    - rm -rf CMakeCache.txt CMakeFiles
+    - mkdir -p .build
+    - cd .build
+    - cmake -GNinja .. -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1 -DCMAKE_INSTALL_PREFIX=/usr -DCONFIG_PIC=1
+    - ninja && sudo ninja install
+    - cd ../..
+cache:
+  directories:
+  - "$HOME/.cache/sccache"
+after_script:
+- sccache -s
+jobs:
+  include:
+      - name: "Build & Coveralls"
+        script:
+         - cargo install cargo-kcov
+         - kcov --version
+         - RUSTFLAGS="-C link-dead-code" cargo build --features=decode_test,quick_test --tests --verbose
+         - travis_wait cargo kcov -v --coveralls --no-clean-rebuild -- --verify --exclude-pattern=$HOME/.cargo,aom_build,.h,test
+      - name: "Tests"
+        script: cargo test --verbose --release --features=decode_test -- --ignored
+      - name: "Bench"
+        script: cargo bench --features=bench --verbose
+      - name: "Doc & Clippy (linter): verifying code quality"
+        script:
+         - cargo doc --verbose --no-deps
+         - rustup component add clippy
+         - cargo clippy --version
+         - cargo clippy -- -D warnings -A clippy::cast_lossless -A clippy::cast_ptr_alignment -A clippy::cognitive_complexity -A clippy::needless_range_loop -A clippy::too_many_arguments -A clippy::verbose_bit_mask -A clippy::unreadable_literal --verbose
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,53 +2,93 @@
 name = "rav1e"
 version = "0.1.0"
 authors = ["Thomas Daede <tdaede@xiph.org>"]
+edition = "2018"
 build = "build.rs"
-include = ["/src/**", "/aom_build/**", "/Cargo.toml"]
+include = ["/src/**", "/Cargo.toml", "/build.rs"]
+license = "BSD-2-Clause"
+description = "The fastest and safest AV1 encoder"
+repository = "https://github.com/xiph/rav1e/"
 autobenches = false
 autobins = false
 [features]
-repl = ["rustyline", "binaries"]
+decode_test = ["aom-sys"]
-comparative_bench = []
+decode_test_dav1d = ["dav1d-sys"]
-decode_test = ["bindgen"]
+binaries = ["ivf", "y4m", "clap", "scan_fmt", "serde_json"]
-binaries = ["y4m", "clap"]
+default = ["binaries", "nasm", "signal_support"]
-default = ["binaries"]
+nasm = ["nasm-rs"]
+signal_support = ["signal-hook"]
+dump_ivf = ["ivf"]
+quick_test = []
+desync_finder = []
+bench = []
 [dependencies]
-bitstream-io = "0.6"
+arg_enum_proc_macro = "0.1.1"
-clap = { version = "2", optional = true }
+bitstream-io = "0.8"
+clap = { version = "2", optional = true, default-features = false }
 libc = "0.2"
-rand = "0.5"
+y4m = { version = "0.3.2", optional = true }
-rustyline = { version = "1", optional = true }
-y4m = { version = "0.3", optional = true }
 backtrace = "0.3"
+syn = "^0.15.20"
+quote = "^0.6.10" # hack for proc-macro-hack
 num-traits = "0.2"
+num-derive = "0.2"
+paste = "0.1"
+serde = "1.0"
+serde_derive = "1.0"
+serde_json = { version = "1.0", optional = true }
+dav1d-sys = { version = "0.2", optional = true }
+aom-sys = { version = "0.1.2", optional = true }
+scan_fmt = { version = "0.2", optional = true }
+ivf = { version = "0.1", path = "ivf/", optional = true }
+avformat-sys = { version = "0.1", path = "crates/avformat-sys/", optional = true }
+rayon = "1.0"
+bincode = "1.1"
+arrayvec = "0.4.10"
 [build-dependencies]
-cmake = "0.1.32"
+nasm-rs = { version = "0.1", path = "crates/nasm_rs/", optional = true }
-pkg-config = "0.3.12"
+vergen = "3"
 [target.'cfg(unix)'.build-dependencies]
-bindgen = { version = "0.37", optional = true }
+pkg-config = "0.3.12"
+[target.'cfg(unix)'.dependencies]
+signal-hook = { version = "0.1.9", optional = true }
 [dev-dependencies]
-bencher = "0.1.5"
+criterion = "0.2"
+pretty_assertions = "0.6"
+interpolate_name = "0.2.2"
+rand = "0.6"
+rand_chacha = "0.1"
+semver = "0.9"
 [[bin]]
 name = "rav1e"
+required-features = ["binaries"]
+bench = false
-[[bin]]
+[lib]
-name = "rav1repl"
+bench = false
-required-features = ["repl"]
 [[bench]]
 name = "bench"
 harness = false
+[profile.dev]
+opt-level = 2
 [profile.release]
 codegen-units = 1  # if > 1 enables parallel code generation which improves
                   # compile times, but prevents some optimizations.
                   # Passes `-C codegen-units`. Ignored when `lto = true`.
+lto = true
 [profile.bench]
 codegen-units = 1
+lto = true
+[workspace]
+members = [".", "ivf", "crates/nasm_rs", "crates/avformat-sys"]
--- a/LICENSE
+++ b/LICENSE
-Copyright (c) 2017, Thomas Daede. All rights reserved.
+BSD 2-Clause License
-Redistribution and use in source and binary forms, with or without
+Copyright (c) 2017-2019, the rav1e contributors
-modification, are permitted provided that the following conditions
+All rights reserved.
-are met:
-1. Redistributions of source code must retain the above copyright
+Redistribution and use in source and binary forms, with or without
-   notice, this list of conditions and the following disclaimer.
+modification, are permitted provided that the following conditions are met:
-2. Redistributions in binary form must reproduce the above copyright
+* Redistributions of source code must retain the above copyright notice, this
-   notice, this list of conditions and the following disclaimer in
+  list of conditions and the following disclaimer.
-   the documentation and/or other materials provided with the
-   distribution.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* Redistributions in binary form must reproduce the above copyright notice,
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  this list of conditions and the following disclaimer in the documentation
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+  and/or other materials provided with the distribution.
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/README.md
+++ b/README.md
 The fastest and safest AV1 encoder.
-[![Build Status](https://travis-ci.org/xiph/rav1e.svg?branch=master)](https://travis-ci.org/xiph/rav1e)
+[![Travis Build Status](https://travis-ci.org/xiph/rav1e.svg?branch=master)](https://travis-ci.org/xiph/rav1e)
+[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/xiph/rav1e?branch=master&svg=true)](https://ci.appveyor.com/project/tdaede/rav1e/history)
+[![Coverage Status](https://coveralls.io/repos/github/xiph/rav1e/badge.svg?branch=master)](https://coveralls.io/github/xiph/rav1e?branch=master)
 # Overview
 rav1e is an experimental AV1 video encoder. It is designed to eventually cover all use cases, though in its current form it is most suitable for cases where libaom (the reference encoder) is too slow.
-rav1e temporarily uses libaom's transforms and CDF initialization tables, but is otherwise an independent implementation.
 # Features
-* Intra frames
+* Intra and inter frames
 * 64x64 superblocks
-* 4x4 to 32x32 RDO-selected square blocks
+* 4x4 to 64x64 RDO-selected square and 2:1/1:2 rectangular blocks
-* DC, H, V, Paeth, and smooth prediction modes
+* DC, H, V, Paeth, smooth, and a subset of directional prediction modes
-* 4x4 DCT and ADST transforms
+* DCT, ADST and identity transforms (up to 64x64, 16x16 and 32x32 respectively)
 * 8-, 10- and 12-bit depth color
+* 4:2:0 (full support), 4:2:2 and 4:4:4 (limited) chroma sampling
 * Variable speed settings
-* ~10 fps encoding @ 480p
+* Near real-time encoding at high speed levels
+# Releases
+For the foreseeable future, a weekly pre-release of rav1e will be [published](https://github.com/xiph/rav1e/releases) every Tuesday.
+# Windows builds
+Automated AppVeyor builds can be found [here](https://ci.appveyor.com/project/tdaede/rav1e/history). Click on a build (it is recommended you select a build based on "master"), then click ARTIFACTS to reveal the rav1e.exe download link.
 # Building
-This repository uses a git submodule. To initialize it, run:
+**rav1e** can optionally use either `libaom` (default) or a `dav1d` installation to run some extended tests.
+Some `x86_64`-specific optimizations require a recent version of NASM.
+In order to build, test and link to the codec on UNIX, you need Perl, NASM, CMake, Clang and pkg-config. To install this on Ubuntu or Linux Mint, run:
 ```
-git submodule update --init
+sudo apt install perl nasm cmake clang pkg-config
 ```
-This is also required every time you switch branches or pull a submodule change.
+On Windows, pkg-config is not required. A Perl distribution such as Strawberry Perl, CMake, and a NASM binary in your system PATH are required.
-In order to build, test and link to the codec on UNIX, you need Perl, Yasm, CMake, Clang and pkg-config. To install this on Ubuntu or Linux Mint, run:
+To build release binary in `target/release/rav1e` run:
 ```
-sudo apt install perl yasm cmake clang pkg-config
+cargo build --release
 ```
-On Windows, pkg-config is not required. A Perl distribution such as Strawberry Perl, CMake, and a Yasm binary in your system PATH are required.
+## Building the C-API
+**rav1e** provides a C-compatible set of library, header and pkg-config file.
+To build and install it you can use [cargo-c](https://crates.io/crates/cargo-c):
+```
+cargo install cargo-c
+cargo cinstall --release
+```
 # Compressing video
@@ -46,56 +66,93 @@ cargo run --release --bin rav1e -- input.y4m -o output.ivf
 ```
 # Decompressing video
+Encoder output should be compatible with any AV1 decoder compliant with the v1.0.0 specification. You can build compatible aomdec using the following:
 ```
 mkdir aom_test
 cd aom_test
-cmake ../aom_build/aom -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=0 -DCONFIG_UNIT_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1
+cmake /path/to/aom -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=0 -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1
 make -j8
 ./aomdec ../output.ivf -o output.y4m
 ```
+# Configuring
+rav1e has several optional features that can be enabled by passing --features to cargo test. Passing --all-features is discouraged.
+* nasm - enabled by default. When enabled, assembly is built for x86_64.
+# Using the AOMAnalyzer
+## Local Analyzer
+1. Download the [AOM Analyzer](http://aomanalyzer.org).
+2. Download [inspect.js](https://people.xiph.org/~mbebenita/analyzer/inspect.js) and [inspect.wasm](https://people.xiph.org/~mbebenita/analyzer/inspect.wasm) and save them in the same directory.
+3. Run the analyzer: `AOMAnalyzer path_to_inspect.js output.ivf`
+## Online Analyzer
+If your `.ivf` file is hosted somewhere (and CORS is enabled on your web server) you can use:
+```
+https://arewecompressedyet.com/analyzer/?d=https://people.xiph.org/~mbebenita/analyzer/inspect.js&f=path_to_output.ivf
+```
 # Design
 * src/context.rs - High-level functions that write symbols to the bitstream, and maintain context.
 * src/ec.rs - Low-level implementation of the entropy coder, which directly writes the bitstream.
-* src/lib.rs - The top level library, contains code to write headers, manage buffers, and iterate throught each superblock.
+* src/lib.rs - The top level library, contains code to write headers, manage buffers, and iterate through each superblock.
 * src/partition.rs - Functions and enums to manage partitions (subdivisions of a superblock).
 * src/predict.rs - Intra prediction implementations.
 * src/quantize.rs - Quantization and dequantization functions for coefficients.
 * src/rdo.rs - RDO-related structures and distortion computation functions.
-* src/transform.rs - Implementations of DCT and ADST transforms.
+* src/transform/*.rs - Implementations of DCT and ADST transforms.
 * src/util.rs - Misc utility code.
 * src/bin/rav1e.rs - rav1e command line tool.
 * src/bin/rav1erepl.rs - Command line tool for debugging.
-* aom_build/ - Local submodule of libaom. Some C functions and constants are used directly. Also used for benchmarking and testing.
 # Contributing
+## Toolchain
+rav1e uses the stable version of Rust (the stable toolchain).
+To install the toolchain:
+```
+rustup install stable
+```
 ## Coding style
 Check code formatting with [rustfmt](https://github.com/rust-lang-nursery/rustfmt) before submitting a PR.
-rav1e currently uses a [forked version](https://github.com/mbebenita/rustfmt) of rustfmt.
-To install rustfmt:
+To install the rustfmt:
 ```
-git clone https://github.com/mbebenita/rustfmt
+rustup component add rustfmt
-cd rustfmt
-cargo +nightly build // Depends on the Rust nightly toolchain. 
-cargo +nightly install -f // Overwrite the installed rustfmt.
 ```
 then
 ```
-cd rav1e
+cargo fmt -- --check
-cargo +nightly fmt -- --check
 ```
-You should also try [clippy](https://github.com/rust-lang-nursery/rust-clippy).
+## Code Analysis
+The [clippy](https://github.com/rust-lang-nursery/rust-clippy) will help catch common mistakes and improve your Rust code.
+We recommend you use it before submitting a PR.
+To install clippy:
 ```
-cargo +nightly clippy
+rustup component add clippy
 ```
+then you can search "cargo clippy" in [.travis.yml](https://github.com/xiph/rav1e/blob/master/.travis.yml) for detailed command and run it.
 ## Testing
 Run unit tests with:
 ```
@@ -104,17 +161,17 @@ cargo test
 Run encode-decode integration tests with:
 ```
-cargo test --release --features=decode_test -- --ignored
+cargo test --release --features=decode_test
 ```
-Run regular benchmarks with:
+Run the encode-decode tests against `dav1d` with:
 ```
-cargo bench
+cargo test --release --features=decode_test_dav1d
 ```
-Run comparative benchmarks with:
+Run regular benchmarks with:
 ```
-cargo bench --features=comparative_bench
+cargo bench --features=bench
 ```
 # Getting in Touch

--- a/aom @ 82e0d124
+++ b/aom @ 82e0d124
-Subproject commit 82e0d124dba5db07f73c52b3d93b0460b4a83cdf
--- a/appveyor.yml
+++ b/appveyor.yml
-os: Visual Studio 2017
+image: Visual Studio 2019
 environment:
-    matrix:   
+  host: x86_64-pc-windows-msvc
-    - channel: stable
+  matrix:
+    - platform: x86_64
      target: x86_64-pc-windows-msvc
+      channel: stable
+    - platform: arm64
+      target: aarch64-pc-windows-msvc
+      channel: nightly
+matrix:
+  allow_failures:
+    - platform: arm64
 install:
+    - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
    - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
-    - appveyor DownloadFile http://www.tortall.net/projects/yasm/releases/yasm-1.3.0-win64.exe -FileName yasm.exe
+    - appveyor DownloadFile https://people.xiph.org/~tdaede/nasm-2.14.02-win64.zip -FileName nasm.zip
-    - rustup-init -yv --default-toolchain %channel% --default-host %target%
+    - appveyor DownloadFile https://github.com/mozilla/sccache/releases/download/0.2.8/sccache-0.2.8-x86_64-pc-windows-msvc.tar.gz
-    - set PATH=%PATH%;%USERPROFILE%\.cargo\bin;%APPVEYOR_BUILD_FOLDER%
+    - tar xzf sccache-0.2.8-x86_64-pc-windows-msvc.tar.gz
+    - 7z e -y nasm.zip
+    - rustup-init -yv --default-toolchain %channel% --default-host %host%
+    - set PATH=%PATH%;%USERPROFILE%\.cargo\bin;%APPVEYOR_BUILD_FOLDER%;nasm-2.14.02;sccache-0.2.8-x86_64-pc-windows-msvc
+    - set RUSTC_WRAPPER=sccache
+    - set SCCACHE_CACHE_SIZE=500M
+    - set SCCACHE_DIR=%LOCALAPPDATA%\Mozilla\sccache
+    - ps: $Env:PKG_CONFIG_ALLOW_CROSS=1
    - rustc -vV
    - cargo -vV
+    - rustup target add %target%
-build: false
+on_success:
+    - sccache -s
+cache:
+    - '%LOCALAPPDATA%\Mozilla\sccache'
+build_script:
+    - cargo build --release --target=%target%
 test_script:
-    - git submodule update --init
+    - cargo test --target=%target% --verbose
-    - cargo test --verbose
+artifacts:
+    - path: target\$(target)\release\rav1e.exe
+      name: rav1e-$(platform)
+deploy:
+  - provider: GitHub
+    artifact: target\$(target)\release\rav1e.exe
+    auth_token:
+      secure: 'LPBjNyFOg+vBkVR4w+89YVNhByaXBGNwtN6UwkFkWTfPow5oeCbFMtJavU9ZLs+c'
+    prerelease: true
+    on:
+      appveyor_repo_tag: true
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -7,96 +7,93 @@
 // Media Patent License 1.0 was not distributed with this source code in the
 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#[macro_use]
-extern crate bencher;
-extern crate rand;
-extern crate rav1e;
 mod predict;
+mod transform;
+mod dist;
-use bencher::*;
+use rav1e::bench::api::*;
-use rav1e::*;
+use rav1e::bench::encoder::*;
-use rav1e::context::*;
+use rav1e::bench::cdef::*;
-use rav1e::ec;
+use rav1e::bench::context::*;
-use rav1e::partition::*;
+use rav1e::bench::ec::*;
-use rav1e::predict::*;
+use rav1e::bench::partition::*;
+use rav1e::bench::predict::*;
-#[cfg(feature = "comparative_bench")]
+use rav1e::bench::transform::*;
-mod comparative;
+use rav1e::bench::rdo::*;
-struct WriteB {
+use crate::transform::transform;
-  tx_size: TxSize,
-  qi: usize
-}
-impl TDynBenchFn for WriteB {
+use criterion::*;
-  fn run(&self, b: &mut Bencher) {
+use std::time::Duration;
-    write_b_bench(b, self.tx_size, self.qi);
-  }
-}
-pub fn write_b() -> Vec<TestDescAndFn> {
+fn write_b(c: &mut Criterion) {
-  use std::borrow::Cow;
-  let mut benches = ::std::vec::Vec::new();
  for &tx_size in &[TxSize::TX_4X4, TxSize::TX_8X8] {
    for &qi in &[20, 55] {
-      let w = WriteB { tx_size, qi };
      let n = format!("write_b_bench({:?}, {})", tx_size, qi);
-      benches.push(TestDescAndFn {
+      c.bench_function(&n, move |b| write_b_bench(b, tx_size, qi));
-        desc: TestDesc { name: Cow::from(n), ignore: false },
-        testfn: TestFn::DynBenchFn(Box::new(w))
-      });
    }
  }
-  benches
 }
 fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
-  unsafe {
+  let config = EncoderConfig {
-    av1_rtcd();
+    width: 1024,
-    aom_dsp_rtcd();
+    height: 1024,
-  }
+    quantizer: qindex,
-  let config =
+    speed_settings: SpeedSettings::from_preset(10),
-    EncoderConfig { quantizer: qindex, speed: 10, ..Default::default() };
+    ..Default::default()
-  let mut fi = FrameInvariants::new(1024, 1024, config);
+  };
-  let mut w = ec::WriterEncoder::new();
+  let sequence = Sequence::new(&Default::default());
-  let fc = CDFContext::new(fi.config.quantizer as u8);
+  let mut fi = FrameInvariants::<u16>::new(config, sequence);
-  let bc = BlockContext::new(fi.sb_width * 16, fi.sb_height * 16);
+  let mut w = WriterEncoder::new();
+  let mut fc = CDFContext::new(fi.base_q_idx);
+  let mut fb = FrameBlocks::new(fi.sb_width * 16, fi.sb_height * 16);
+  let mut tb = fb.as_tile_blocks_mut();
+  let bc = BlockContext::new(&mut tb);
  let mut fs = FrameState::new(&fi);
-  let mut cw = ContextWriter::new(fc, bc);
+  let mut ts = fs.as_tile_state_mut();
+  // For now, restoration unit size is locked to superblock size.
+  let mut cw = ContextWriter::new(&mut fc, bc);
  let tx_type = TxType::DCT_DCT;
  let sbx = 0;
  let sby = 0;
+  let ac = &[0i16; 32 * 32];
  b.iter(|| {
    for &mode in RAV1E_INTRA_MODES {
      let sbo = SuperBlockOffset { x: sbx, y: sby };
-      fs.qc.update(fi.config.quantizer, tx_size, mode.is_intra(), 8);
      for p in 1..3 {
+        ts.qc.update(fi.base_q_idx, tx_size, mode.is_intra(), 8, fi.dc_delta_q[p], fi.ac_delta_q[p]);
        for by in 0..8 {
          for bx in 0..8 {
            // For ex, 8x8 tx should be applied to even numbered (bx,by)
-            if (tx_size.width_mi() >> 1) & bx != 0 ||
+            if (tx_size.width_mi() >> 1) & bx != 0
-              (tx_size.height_mi() >> 1) & by != 0 { continue; };
+              || (tx_size.height_mi() >> 1) & by != 0
+            {
+              continue;
+            };
            let bo = sbo.block_offset(bx, by);
            let tx_bo = BlockOffset { x: bo.x + bx, y: bo.y + by };
-            let po = tx_bo.plane_offset(&fs.input.planes[p].cfg);
+            let po = tx_bo.plane_offset(&ts.input.planes[p].cfg);
            encode_tx_block(
              &mut fi,
-              &mut fs,
+              &mut ts,
              &mut cw,
              &mut w,
              p,
-              &bo,
+              bo,
              mode,
              tx_size,
              tx_type,
              tx_size.block_size(),
-              &po,
+              po,
              false,
-              8
+              ac,
+              0,
+              RDOType::PixelDistRealRate,
+              true
            );
          }
        }
@@ -105,19 +102,93 @@ fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
  });
 }
-benchmark_group!(
+fn cdef_frame(c: &mut Criterion) {
-  intra_prediction,
+  let w = 128;
-  predict::intra_dc_4x4,
+  let h = 128;
-  predict::intra_h_4x4,
+  let n = format!("cdef_frame({}, {})", w, h);
-  predict::intra_v_4x4,
+  c.bench_function(&n, move |b| cdef_frame_bench(b, w, h));
-  predict::intra_paeth_4x4,
+}
-  predict::intra_smooth_4x4,
-  predict::intra_smooth_h_4x4,
+fn cdef_frame_bench(b: &mut Bencher, width: usize, height: usize) {
-  predict::intra_smooth_v_4x4
+  let config = EncoderConfig {
-);
+    width,
+    height,
-#[cfg(feature = "comparative_bench")]
+    quantizer: 100,
-benchmark_main!(comparative::intra_prediction);
+    speed_settings: SpeedSettings::from_preset(10),
+    ..Default::default()
-#[cfg(not(feature = "comparative_bench"))]
+  };
-benchmark_main!(write_b, intra_prediction);
+  let sequence = Sequence::new(&Default::default());
+  let fi = FrameInvariants::<u16>::new(config, sequence);
+  let fb = FrameBlocks::new(fi.sb_width * 16, fi.sb_height * 16);
+  let mut fs = FrameState::new(&fi);
+  b.iter(|| cdef_filter_frame(&fi, &mut fs.rec, &fb));
+}
+fn cfl_rdo(c: &mut Criterion) {
+  for &bsize in &[
+    BlockSize::BLOCK_4X4,
+    BlockSize::BLOCK_8X8,
+    BlockSize::BLOCK_16X16,
+    BlockSize::BLOCK_32X32
+  ] {
+    let n = format!("cfl_rdo({:?})", bsize);
+    c.bench_function(&n, move |b| cfl_rdo_bench(b, bsize));
+  }
+}
+fn cfl_rdo_bench(b: &mut Bencher, bsize: BlockSize) {
+  let config = EncoderConfig {
+    width: 1024,
+    height: 1024,
+    quantizer: 100,
+    speed_settings: SpeedSettings::from_preset(10),
+    ..Default::default()
+  };
+  let sequence = Sequence::new(&Default::default());
+  let fi = FrameInvariants::<u16>::new(config, sequence);
+  let mut fs = FrameState::new(&fi);
+  let mut ts = fs.as_tile_state_mut();
+  let offset = BlockOffset { x: 1, y: 1 };
+  b.iter(|| rdo_cfl_alpha(&mut ts, offset, bsize, fi.sequence.bit_depth))
+}
+fn ec_bench(c: &mut Criterion) {
+    c.bench_function("update_cdf_4_native", update_cdf_4_native);
+    c.bench_function("update_cdf_4_sse2", update_cdf_4_sse2);
+}
+fn update_cdf_4_native(b: &mut Bencher) {
+    let mut cdf = [7296, 3819, 1616, 0, 0];
+    b.iter(|| {
+        for i in 0..1000 {
+            WriterBase::<WriterRecorder>::update_cdf(&mut cdf, i & 3);
+            black_box(cdf);
+        }
+    });
+}
+fn update_cdf_4_sse2(b: &mut Bencher) {
+    let mut cdf = [7296, 3819, 1616, 0, 0];
+    b.iter(|| {
+        for i in 0..1000 {
+            WriterBase::<WriterRecorder>::update_cdf_4_sse2(&mut cdf, i & 3);
+            black_box(cdf);
+        }
+    });
+}
+criterion_group!(intra_prediction, predict::pred_bench,);
+criterion_group!(cfl, cfl_rdo);
+criterion_group!(cdef, cdef_frame);
+criterion_group!(write_block, write_b);
+criterion_group!{ name = me;
+                  config = Criterion::default().warm_up_time(Duration::new(1,0));
+                  targets = dist::get_sad
+}
+criterion_group!(ec, ec_bench);
+criterion_main!(write_block, intra_prediction, cdef, cfl, me, transform, ec);
--- a/benches/comparative/predict.rs
+++ b/benches/comparative/predict.rs
-// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
-//
-// This source code is subject to the terms of the BSD 2 Clause License and
-// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-// was not distributed with this source code in the LICENSE file, you can
-// obtain it at www.aomedia.org/license/software. If the Alliance for Open
-// Media Patent License 1.0 was not distributed with this source code in the
-// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-use bencher::*;
-use comparative::libc;
-use predict as predict_native;
-use predict::*;
-use rand::{ChaChaRng, SeedableRng};
-extern {
-  fn highbd_dc_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-  fn highbd_h_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-  fn highbd_v_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-  fn highbd_paeth_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-  fn highbd_smooth_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-  fn highbd_smooth_h_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-  fn highbd_smooth_v_predictor(
-    dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
-    above: *const u16, left: *const u16, bd: libc::c_int
-  );
-}
-fn predict_intra_4x4_aom(
-  b: &mut Bencher, 
-  predictor: unsafe extern "C" fn(*mut u16, libc::ptrdiff_t, libc::c_int, libc::c_int, *const u16, *const u16, libc::c_int)) 
-{
-  let mut rng = ChaChaRng::from_seed([0; 32]);
-  let (mut block, above_context, left_context) = generate_block(&mut rng);
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
-      unsafe {
-        predictor(block.as_mut_ptr(), BLOCK_SIZE.width() as libc::ptrdiff_t, 
-          4, 4, above_context.as_ptr(), left_context.as_ptr(), 8);
-      }
-    }
-  })
-}
-pub fn intra_dc_4x4_native(b: &mut Bencher) {
-  predict_native::intra_dc_4x4(b);
-}
-pub fn intra_dc_4x4_aom(b: &mut Bencher) {
-  predict_intra_4x4_aom(b, highbd_dc_predictor);
-}
-pub fn intra_h_4x4_native(b: &mut Bencher) {
-  predict_native::intra_h_4x4(b);
-}
-pub fn intra_h_4x4_aom(b: &mut Bencher) {
-  predict_intra_4x4_aom(b, highbd_h_predictor);
-}
-pub fn intra_v_4x4_native(b: &mut Bencher) {
-  predict_native::intra_v_4x4(b);
-}
-pub fn intra_v_4x4_aom(b: &mut Bencher) {
-  predict_intra_4x4_aom(b, highbd_v_predictor);
-}
-pub fn intra_paeth_4x4_native(b: &mut Bencher) {
-  predict_native::intra_paeth_4x4(b);
-}
-pub fn intra_paeth_4x4_aom(b: &mut Bencher) {
-  predict_intra_4x4_aom(b, highbd_paeth_predictor);
-}
-pub fn intra_smooth_4x4_native(b: &mut Bencher) {
-  predict_native::intra_smooth_4x4(b);
-}
-pub fn intra_smooth_4x4_aom(b: &mut Bencher) {
-  predict_intra_4x4_aom(b, highbd_smooth_predictor);
-}
-pub fn intra_smooth_h_4x4_native(b: &mut Bencher) {
-  predict_native::intra_smooth_h_4x4(b);
-}
-pub fn intra_smooth_h_4x4_aom(b: &mut Bencher) {
-  predict_intra_4x4_aom(b, highbd_smooth_h_predictor);
-}
-pub fn intra_smooth_v_4x4_native(b: &mut Bencher) {
-  predict_native::intra_smooth_v_4x4(b);
-}
-pub fn intra_smooth_v_4x4_aom(b: &mut Bencher) {
-  predict_intra_4x4_aom(b, highbd_smooth_v_predictor);
-}
--- a/benches/dist.rs
+++ b/benches/dist.rs
+// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
+//
+// This source code is subject to the terms of the BSD 2 Clause License and
+// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+// was not distributed with this source code in the LICENSE file, you can
+// obtain it at www.aomedia.org/license/software. If the Alliance for Open
+// Media Patent License 1.0 was not distributed with this source code in the
+// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+use criterion::*;
+use rav1e::bench::dist;
+use rav1e::bench::partition::*;
+use rav1e::bench::partition::BlockSize::*;
+use rav1e::bench::frame::*;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaChaRng;
+use rav1e::Pixel;
+fn fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>) {
+  let stride = plane.cfg.stride;
+  for row in plane.data.chunks_mut(stride) {
+    for pixel in row {
+      let v: u8 = ra.gen();
+      *pixel = T::cast_from(v);
+    }
+  }
+}
+fn new_plane<T: Pixel>(ra: &mut ChaChaRng, width: usize, height: usize) -> Plane<T> {
+  let mut p = Plane::new(width, height, 0, 0, 128 + 8, 128 + 8);
+  fill_plane(ra, &mut p);
+  p
+}
+fn run_sad_bench<T: Pixel>(b: &mut Bencher, &(bs, bit_depth): &(BlockSize, usize)) {
+  let mut ra = ChaChaRng::from_seed([0; 32]);
+  let bsw = bs.width();
+  let bsh = bs.height();
+  let w = 640;
+  let h = 480;
+  let input_plane = new_plane::<T>(&mut ra, w, h);
+  let rec_plane = new_plane::<T>(&mut ra, w, h);
+  let plane_org = input_plane.as_region();
+  let plane_ref = rec_plane.as_region();
+  b.iter(|| {
+    let _ =
+      black_box(dist::get_sad(&plane_org, &plane_ref, bsw, bsh, bit_depth));
+  })
+}
+fn bench_get_sad(b: &mut Bencher, &(bs, bit_depth): &(BlockSize, usize)) {
+  if bit_depth <= 8 {
+    run_sad_bench::<u8>(b, &(bs, bit_depth))
+  }
+  else {
+    run_sad_bench::<u16>(b, &(bs, bit_depth))
+  }
+}
+pub fn get_sad(c: &mut Criterion) {
+  let blocks = vec![
+    (BLOCK_4X4, 8),
+    (BLOCK_4X8, 8),
+    (BLOCK_8X4, 8),
+    (BLOCK_8X8, 8),
+    (BLOCK_8X16, 8),
+    (BLOCK_16X8, 8),
+    (BLOCK_16X16, 8),
+    (BLOCK_16X32, 8),
+    (BLOCK_32X16, 8),
+    (BLOCK_32X32, 8),
+    (BLOCK_32X64, 8),
+    (BLOCK_64X32, 8),
+    (BLOCK_64X64, 8),
+    (BLOCK_64X128, 8),
+    (BLOCK_128X64, 8),
+    (BLOCK_128X128, 8),
+    (BLOCK_4X16, 8),
+    (BLOCK_16X4, 8),
+    (BLOCK_8X32, 8),
+    (BLOCK_32X8, 8),
+    (BLOCK_16X64, 8),
+    (BLOCK_64X16, 8),
+    (BLOCK_4X4, 10),
+    (BLOCK_4X8, 10),
+    (BLOCK_8X4, 10),
+    (BLOCK_8X8, 10),
+    (BLOCK_8X16, 10),
+    (BLOCK_16X8, 10),
+    (BLOCK_16X16, 10),
+    (BLOCK_16X32, 10),
+    (BLOCK_32X16, 10),
+    (BLOCK_32X32, 10),
+    (BLOCK_32X64, 10),
+    (BLOCK_64X32, 10),
+    (BLOCK_64X64, 10),
+    (BLOCK_64X128, 10),
+    (BLOCK_128X64, 10),
+    (BLOCK_128X128, 10),
+    (BLOCK_4X16, 10),
+    (BLOCK_16X4, 10),
+    (BLOCK_8X32, 10),
+    (BLOCK_32X8, 10),
+    (BLOCK_16X64, 10),
+    (BLOCK_64X16, 10)
+  ];
+  c.bench_function_over_inputs("get_sad", bench_get_sad, blocks);
+}
--- a/benches/predict.rs
+++ b/benches/predict.rs
 // Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
 //
 // This source code is subject to the terms of the BSD 2 Clause License and
 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 // was not distributed with this source code in the LICENSE file, you can
 // obtain it at www.aomedia.org/license/software. If the Alliance for Open
 // Media Patent License 1.0 was not distributed with this source code in the
 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-use bencher::*;
+use criterion::*;
-use rand::{ChaChaRng, Rng, SeedableRng};
+use rand::{Rng, RngCore, SeedableRng};
-use rav1e::partition::BlockSize;
+use rand_chacha::ChaChaRng;
-use rav1e::predict::{Block4x4, Intra};
+use rav1e::bench::partition::BlockSize;
+use rav1e::bench::predict::{Block4x4, Intra};
-pub const MAX_ITER: usize = 50000;
+use rav1e::bench::frame::*;
-pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32;
+use rav1e::bench::util::*;
-pub fn generate_block(rng: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
+pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32;
-  let block = vec![0u16; BLOCK_SIZE.width() * BLOCK_SIZE.height()];
-  let above_context: Vec<u16> = (0..BLOCK_SIZE.height()).map(|_| rng.gen()).collect();
+pub fn generate_block(rng: &mut ChaChaRng) -> (Plane<u16>, Vec<u16>, Vec<u16>) {
-  let left_context: Vec<u16> = (0..BLOCK_SIZE.width()).map(|_| rng.gen()).collect();
+  let block = Plane::wrap(
+    vec![0u16; BLOCK_SIZE.width() * BLOCK_SIZE.height()],
-  (block, above_context, left_context)
+    BLOCK_SIZE.width(),
-}
+  );
+  let above_context: Vec<u16> =
-pub fn intra_dc_4x4(b: &mut Bencher) {
+    (0..BLOCK_SIZE.height()).map(|_| rng.gen()).collect();
-  let mut ra = ChaChaRng::from_seed([0; 32]);
+  let left_context: Vec<u16> =
-  let (mut block, above, left) = generate_block(&mut ra);
+    (0..BLOCK_SIZE.width()).map(|_| rng.gen()).collect();
-  b.iter(|| {
+  (block, above_context, left_context)
-    for _ in 0..MAX_ITER {
+}
-      Block4x4::pred_dc(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
-    }
+pub fn generate_block_u8<'a>(
-  })
+  rng: &mut ChaChaRng, edge_buf: &'a mut AlignedArray<[u8; 65]>
-}
+) -> (Plane<u8>, &'a [u8], &'a [u8]) {
+  let block = Plane::wrap(
-pub fn intra_h_4x4(b: &mut Bencher) {
+    vec![0u8; BLOCK_SIZE.width() * BLOCK_SIZE.height()],
-  let mut rng = ChaChaRng::from_seed([0; 32]);
+    BLOCK_SIZE.width(),
-  let (mut block, _above, left) = generate_block(&mut rng);
+  );
+  rng.fill_bytes(&mut edge_buf.array);
-  b.iter(|| {
+  let above_context = &edge_buf.array[33..];
-    for _ in 0..MAX_ITER {
+  let left_context = &edge_buf.array[..32];
-      Block4x4::pred_h(&mut block, BLOCK_SIZE.width(), &left[..4]);
-    }
+  (block, above_context, left_context)
-  })
+}
-}
+pub fn bench_pred_fn<F>(c: &mut Criterion, id: &str, f: F)
-pub fn intra_v_4x4(b: &mut Bencher) {
+where
-  let mut rng = ChaChaRng::from_seed([0; 32]);
+  F: FnMut(&mut Bencher) + 'static
-  let (mut block, above, _left) = generate_block(&mut rng);
+{
+  let b = Benchmark::new(id, f);
-  b.iter(|| {
+  c.bench(
-    for _ in 0..MAX_ITER {
+    id,
-      Block4x4::pred_v(&mut block, BLOCK_SIZE.width(), &above[..4]);
+    if id.ends_with("_4x4_u8") {
-    }
+      b.throughput(Throughput::Bytes(16))
-  })
+    } else if id.ends_with("_4x4") {
-}
+      b.throughput(Throughput::Bytes(32))
+    } else {
-pub fn intra_paeth_4x4(b: &mut Bencher) {
+      b
-  let mut rng = ChaChaRng::from_seed([0; 32]);
+    }
-  let (mut block, above, left) = generate_block(&mut rng);
+  );
-  let above_left = unsafe { *above.as_ptr().offset(-1) };
+}
-  b.iter(|| {
+pub fn pred_bench(c: &mut Criterion) {
-    for _ in 0..MAX_ITER {
+  bench_pred_fn(c, "intra_dc_4x4", intra_dc_4x4);
-      Block4x4::pred_paeth(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4], above_left);
+  bench_pred_fn(c, "intra_dc_left_4x4", intra_dc_left_4x4);
-    }
+  bench_pred_fn(c, "intra_dc_top_4x4", intra_dc_top_4x4);
-  })
+  bench_pred_fn(c, "intra_h_4x4", intra_h_4x4);
-}
+  bench_pred_fn(c, "intra_v_4x4", intra_v_4x4);
+  bench_pred_fn(c, "intra_paeth_4x4", intra_paeth_4x4);
-pub fn intra_smooth_4x4(b: &mut Bencher) {
+  bench_pred_fn(c, "intra_smooth_4x4", intra_smooth_4x4);
-  let mut rng = ChaChaRng::from_seed([0; 32]);
+  bench_pred_fn(c, "intra_smooth_h_4x4", intra_smooth_h_4x4);
-  let (mut block, above, left) = generate_block(&mut rng);
+  bench_pred_fn(c, "intra_smooth_v_4x4", intra_smooth_v_4x4);
+  bench_pred_fn(c, "intra_cfl_4x4", intra_cfl_4x4);
-  b.iter(|| {
+  bench_pred_fn(c, "intra_dc_4x4_u8", intra_dc_4x4_u8);
-    for _ in 0..MAX_ITER {
+  bench_pred_fn(c, "intra_dc_128_4x4_u8", intra_dc_128_4x4_u8);
-      Block4x4::pred_smooth(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
+  bench_pred_fn(c, "intra_dc_left_4x4_u8", intra_dc_left_4x4_u8);
-    }
+  bench_pred_fn(c, "intra_dc_top_4x4_u8", intra_dc_top_4x4_u8);
-  })
+  bench_pred_fn(c, "intra_h_4x4_u8", intra_h_4x4_u8);
-}
+  bench_pred_fn(c, "intra_v_4x4_u8", intra_v_4x4_u8);
+  bench_pred_fn(c, "intra_paeth_4x4_u8", intra_paeth_4x4_u8);
-pub fn intra_smooth_h_4x4(b: &mut Bencher) {
+  bench_pred_fn(c, "intra_smooth_4x4_u8", intra_smooth_4x4_u8);
-  let mut rng = ChaChaRng::from_seed([0; 32]);
+  bench_pred_fn(c, "intra_smooth_h_4x4_u8", intra_smooth_h_4x4_u8);
-  let (mut block, above, left) = generate_block(&mut rng);
+  bench_pred_fn(c, "intra_smooth_v_4x4_u8", intra_smooth_v_4x4_u8);
+}
-  b.iter(|| {
-    for _ in 0..MAX_ITER {
+pub fn intra_dc_4x4(b: &mut Bencher) {
-      Block4x4::pred_smooth_h(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
+  let mut rng = ChaChaRng::from_seed([0; 32]);
-    }
+  let (mut block, above, left) = generate_block(&mut rng);
-  })
-}
+  b.iter(|| {
+    Block4x4::pred_dc(&mut block.as_region_mut(), &above[..4], &left[..4]);
-pub fn intra_smooth_v_4x4(b: &mut Bencher) {
+  })
-  let mut rng = ChaChaRng::from_seed([0; 32]);
+}
-  let (mut block, above, left) = generate_block(&mut rng);
+pub fn intra_dc_left_4x4(b: &mut Bencher) {
-  b.iter(|| {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
-    for _ in 0..MAX_ITER {
+  let (mut block, above, left) = generate_block(&mut rng);
-      Block4x4::pred_smooth_v(&mut block, BLOCK_SIZE.width(), &above[..4], &left[..4]);
-    }
+  b.iter(|| {
-  })
+    Block4x4::pred_dc_left(
-}
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
+  })
+}
+pub fn intra_dc_top_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+  b.iter(|| {
+    Block4x4::pred_dc_top(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
+  })
+}
+pub fn intra_h_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, _above, left) = generate_block(&mut rng);
+  b.iter(|| {
+    Block4x4::pred_h(&mut block.as_region_mut(), &left[..4]);
+  })
+}
+pub fn intra_v_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, _left) = generate_block(&mut rng);
+  b.iter(|| {
+    Block4x4::pred_v(&mut block.as_region_mut(), &above[..4]);
+  })
+}
+pub fn intra_paeth_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+  let above_left = unsafe { *above.as_ptr().offset(-1) };
+  b.iter(|| {
+    Block4x4::pred_paeth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4],
+      above_left
+    );
+  })
+}
+pub fn intra_smooth_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+  b.iter(|| {
+    Block4x4::pred_smooth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
+  })
+}
+pub fn intra_smooth_h_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+  b.iter(|| {
+    Block4x4::pred_smooth_h(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
+  })
+}
+pub fn intra_smooth_v_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+  b.iter(|| {
+    Block4x4::pred_smooth_v(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[..4]
+    );
+  })
+}
+pub fn intra_cfl_4x4(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let (mut block, above, left) = generate_block(&mut rng);
+  let ac: Vec<i16> = (0..(32 * 32)).map(|_| rng.gen()).collect();
+  let alpha = -1 as i16;
+  b.iter(|| {
+    Block4x4::pred_cfl(
+      &mut block.as_region_mut(),
+      &ac,
+      alpha,
+      8,
+      &above,
+      &left
+    );
+  })
+}
+pub fn intra_dc_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_dc(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+pub fn intra_dc_128_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, _above, _left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_dc_128(&mut block.as_region_mut(), 8);
+  })
+}
+pub fn intra_dc_left_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_dc_left(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+pub fn intra_dc_top_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_dc_top(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+pub fn intra_h_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, _above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_h(&mut block.as_region_mut(), &left[32 - 4..]);
+  })
+}
+pub fn intra_v_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, _left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_v(&mut block.as_region_mut(), &above[..4]);
+  })
+}
+pub fn intra_paeth_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  let above_left = unsafe { *above.as_ptr().offset(-1) };
+  b.iter(|| {
+    Block4x4::pred_paeth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..],
+      above_left
+    );
+  })
+}
+pub fn intra_smooth_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_smooth(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+pub fn intra_smooth_h_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_smooth_h(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
+pub fn intra_smooth_v_4x4_u8(b: &mut Bencher) {
+  let mut rng = ChaChaRng::from_seed([0; 32]);
+  let mut edge_buf = UninitializedAlignedArray();
+  let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
+  b.iter(|| {
+    Block4x4::pred_smooth_v(
+      &mut block.as_region_mut(),
+      &above[..4],
+      &left[32 - 4..]
+    );
+  })
+}
--- a/benches/transform.rs
+++ b/benches/transform.rs
+// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
+//
+// This source code is subject to the terms of the BSD 2 Clause License and
+// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+// was not distributed with this source code in the LICENSE file, you can
+// obtain it at www.aomedia.org/license/software. If the Alliance for Open
+// Media Patent License 1.0 was not distributed with this source code in the
+// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+use criterion::*;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaChaRng;
+use rav1e::bench::transform;
+fn bench_idct4(b: &mut Bencher, bit_depth: &usize) {
+  let mut ra = ChaChaRng::from_seed([0; 32]);
+  let input: [i32; 4] = ra.gen();
+  let mut output = [0i32; 4];
+  let range = bit_depth + 8;
+  b.iter(|| {
+    transform::av1_idct4(&input[..], &mut output[..], range);
+  });
+}
+pub fn av1_idct4(c: &mut Criterion) {
+  let plain = Fun::new("plain", bench_idct4);
+  let funcs = vec![plain];
+  c.bench_functions("av1_idct4_8", funcs, 8);
+}
+fn bench_idct8(b: &mut Bencher, bit_depth: &usize) {
+  let mut ra = ChaChaRng::from_seed([0; 32]);
+  let input: [i32; 8] = ra.gen();
+  let mut output = [0i32; 8];
+  let range = bit_depth + 8;
+  b.iter(|| {
+    transform::av1_idct8(&input[..], &mut output[..], range);
+  });
+}
+pub fn av1_idct8(c: &mut Criterion) {
+  let plain = Fun::new("plain", bench_idct8);
+  let funcs = vec![plain];
+  c.bench_functions("av1_idct8_8", funcs, 8);
+}
+criterion_group!(transform, av1_idct4, av1_idct8);
--- a/build.rs
+++ b/build.rs
 // build.rs
-extern crate cmake;
+#[allow(unused_imports)]
-extern crate pkg_config;
-#[cfg(unix)]
-#[cfg(feature = "decode_test")]
-extern crate bindgen;
 use std::env;
 use std::fs;
 use std::path::Path;
-fn main() {
+#[allow(dead_code)]
-    if cfg!(windows) && cfg!(feature = "decode_test") {
+fn rerun_dir<P: AsRef<Path>>(dir: P) {
-        panic!("Unsupported feature on this platform!");
+    for entry in fs::read_dir(dir).unwrap() {
-    }
+        let entry = entry.unwrap();
+        let path = entry.path();
+        println!("cargo:rerun-if-changed={}", path.to_string_lossy());
-    let cargo_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
+        if path.is_dir() {
-    let build_path = Path::new(&cargo_dir).join("aom_build/aom");
+            rerun_dir(path);
-    let debug = if let Some(v) = env::var("PROFILE").ok() {
-        match v.as_str() {
-            "bench" | "release" => false,
-            _ => true,
-        }
-    } else {
-        false
-    };
-    let dst = cmake::Config::new(build_path)
-        .define("CONFIG_DEBUG", (debug as u8).to_string())
-        .define("CONFIG_ANALYZER", "0")
-        .define("ENABLE_DOCS", "0")
-        .define("ENABLE_TESTS", "0")
-        .no_build_target(cfg!(windows))
-        .build();
-    // Dirty hack to force a rebuild whenever the defaults are changed upstream
-    let _ = fs::remove_file(dst.join("build/CMakeCache.txt"));
-    #[cfg(windows)] {
-        if dst.join("lib/pkgconfig").join("aom.pc").exists() {
-            env::set_var("PKG_CONFIG_PATH", dst.join("lib/pkgconfig"));
-            pkg_config::Config::new().statik(true).probe("aom").unwrap();
-        } else { // MSVC
-            let bin_dir = if debug {
-                "Debug"
-            } else {
-                "Release"
-            };
-            println!("cargo:rustc-link-search=native={}", dst.join("build").join(bin_dir).to_str().unwrap());
-            println!("cargo:rustc-link-lib=static=aom");
        }
    }
+}
-    #[cfg(unix)] {
-        env::set_var("PKG_CONFIG_PATH", dst.join("lib/pkgconfig"));
-        let _libs = pkg_config::Config::new().statik(true).probe("aom").unwrap();
-        #[cfg(feature = "decode_test")] {
-            use std::io::Write;
-            let out_dir = env::var("OUT_DIR").unwrap();
-            let headers = _libs.include_paths.clone();
-            let mut builder = bindgen::builder()
-                .blacklist_type("max_align_t")
-                .rustfmt_bindings(false)
-                .header("data/aom.h");
-            for header in headers {
-                builder = builder.clang_arg("-I").clang_arg(header.to_str().unwrap());
-            }
-            // Manually fix the comment so rustdoc won't try to pick them
-            let s = builder
-                .generate()
-                .unwrap()
-                .to_string()
-                .replace("/**", "/*")
-                .replace("/*!", "/*");
-            let dest_path = Path::new(&out_dir).join("aom.rs");
-            let mut file = fs::File::create(dest_path).unwrap();
+#[cfg(feature = "nasm")]
+fn build_nasm_files() {
+  use std::fs::File;
+  use std::io::Write;
+  let out_dir = env::var("OUT_DIR").unwrap();
+  {
+      let dest_path = Path::new(&out_dir).join("config.asm");
+      let mut config_file = File::create(dest_path).unwrap();
+      config_file.write(b"	%define private_prefix rav1e\n").unwrap();
+      config_file.write(b"	%define ARCH_X86_32 0\n").unwrap();
+      config_file.write(b" %define ARCH_X86_64 1\n").unwrap();
+      config_file.write(b"	%define PIC 1\n").unwrap();
+      config_file.write(b" %define STACK_ALIGNMENT 16\n").unwrap();
+      if cfg!(target_os="macos") {
+        config_file.write(b" %define PREFIX 1\n").unwrap();
+      }
+  }
+  let mut config_include_arg = String::from("-I");
+  config_include_arg.push_str(&out_dir);
+  config_include_arg.push('/');
+  nasm_rs::compile_library_args(
+      "rav1easm",
+      &[
+          "src/x86/data.asm",
+          "src/x86/ipred.asm",
+          "src/x86/itx.asm",
+          "src/x86/mc.asm",
+          "src/x86/me.asm",
+          "src/x86/sad_sse2.asm",
+          "src/x86/sad_avx.asm"
+      ],
+      &[&config_include_arg, "-Isrc/"]
+  );
+  println!("cargo:rustc-link-lib=static=rav1easm");
+  rerun_dir("src/x86");
+  rerun_dir("src/ext/x86");
+}
-            let _ = file.write(s.as_bytes());
+#[allow(unused_variables)]
-        }
+fn main() {
+    let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
+    let os = env::var("CARGO_CFG_TARGET_OS").unwrap();
+    // let env = env::var("CARGO_CFG_TARGET_ENV").unwrap();
+    #[cfg(feature = "nasm")] {
+      if arch == "x86_64" {
+        build_nasm_files()
+      }
    }
-    fn rerun_dir<P: AsRef<Path>>(dir: P) {
+    if os == "windows" && cfg!(feature = "decode_test") {
-        for entry in fs::read_dir(dir).unwrap() {
+        panic!("Unsupported feature on this platform!");
-            let entry = entry.unwrap();
-            let path = entry.path();
-            println!("cargo:rerun-if-changed={}", path.to_string_lossy());
-            if path.is_dir() {
-                rerun_dir(path);
-            }
-        }
    }
-    rerun_dir("aom_build");
+    vergen::generate_cargo_keys(vergen::ConstantsFlags::all()).expect("Unable to generate the cargo keys!");
 }
--- a/build.sh
+++ b/build.sh
@@ -5,11 +5,21 @@ set -e
 #SEQ=!!!!! ENTER YOUR FAVORITE Y4M HERE !!!!!
+IS_RELEASE=1
+for arg in "$@"; do
+  shift
+  case "$arg" in
+    "--debug") IS_RELEASE=0 ;;
+    *)        set -- "$@" "$arg"
+  esac
+done
 if [[ -z "${SEQ}" ]]; then
  SEQ=nyan.y4m
  SEQ10=nyan10.y4m
  SEQ12=nyan12.y4m
  wget -nc https://mf4.xiph.org/~ltrudeau/videos/nyan.y4m
  #wget -nc https://people.xiph.org/~tdaede/nyan10.y4m
  #wget -nc https://people.xiph.org/~tdaede/nyan12.y4m
@@ -22,48 +32,6 @@ if [ ! -f $SEQ ]; then
  exit 1 # terminate and indicate error
 fi
-# Hide githash to detect version changes
-GITHASH=".git/rav1e.githash"
-# Get previous version
-EXPECTED_VERSION="42"
-if [ -f $GITHASH ]; then
-  EXPECTED_VERSION=$(cat $GITHASH)
-fi
-# Get current version
-ACTUAL_VERSION=$(git submodule status | xargs)
-AOM_TEST="aom_test"
-if [[ "$ACTUAL_VERSION" != "$EXPECTED_VERSION" ]] || [[ ! -f ./${AOM_TEST}/aomdec ]]; then
-# Store current version to file
-echo $ACTUAL_VERSION > $GITHASH
-# Update aombuild
-git submodule update --init
-# Clean project files
-cargo clean
-# Get configure command from readme
-CONFIGURE_CMD=$(fgrep "cmake ../aom" README.md)
-# Wipe and create aom_test folder
-rm -fR $AOM_TEST
-mkdir -p $AOM_TEST
-pushd $AOM_TEST
-echo CONFIGURE COMMAND
-echo $CONFIGURE_CMD
-eval $CONFIGURE_CMD
-# auto detect the number of cores and parallel build
-make -j$(nproc --all)
-popd
-fi
 # File containing the encoded sequence
 ENC_FILE="enc_file.ivf"
 # File containing the reconstructed sequence
@@ -75,13 +43,20 @@ DEC_FILE="dec_file.y4m"
 export RUST_BACKTRACE=1
 # Build and run encoder
-cargo run --bin rav1e --release -- $SEQ -o $ENC_FILE -s 3 -r $REC_FILE
+BUILD_TYPE=""
+if [ $IS_RELEASE == 1 ]; then
+  BUILD_TYPE="--release"
+fi
+cargo run --bin rav1e $BUILD_TYPE -- $SEQ -o $ENC_FILE -s 3 -r $REC_FILE
 # Decode
-${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
+aomdec $ENC_FILE -o $DEC_FILE
 # Input/Output compare
-cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
+tail -n+2 $DEC_FILE > /tmp/dec_file
+tail -n+2 $REC_FILE > /tmp/rec_file
+cmp /tmp/dec_file /tmp/rec_file || (printf '\e[1;31m%-6s\e[m\n\n' 'Desync detected!!!' && exit 1)
 # Daala tools support coming soon
 #DAALA_TOOLS="../daala/tools/"
@@ -100,11 +75,11 @@ mpv --loop $DEC_FILE
 # Repeat for high bit depth clips
 #cargo run --bin rav1e --release -- $SEQ10 -o $ENC_FILE -s 3 -r $REC_FILE
-#${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
+#aomdec $ENC_FILE -o $DEC_FILE
 #cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
 #mpv --loop $DEC_FILE
 #cargo run --bin rav1e --release -- $SEQ12 -o $ENC_FILE -s 3 -r $REC_FILE
-#${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
+#aomdec $ENC_FILE -o $DEC_FILE
 #cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
 #mpv --loop $DEC_FILE
--- a/cbindgen.toml
+++ b/cbindgen.toml
+header = "// SPDX-License-Identifier: MIT"
+sys_includes = ["stddef.h", "stdint.h", "stdlib.h"]
+no_includes = true
+include_guard = "RAV1E_H"
+tab_width = 4
+style = "Type"
+language = "C"
+[parse]
+parse_deps = true
+include = ['rav1e']
+[export]
+prefix = "Ra"
+item_types = ["enums", "structs", "unions", "typedefs", "opaque", "functions"]
+[enum]
+rename_variants = "ScreamingSnakeCase"
+prefix_with_name = true
--- a/clippy.toml
+++ b/clippy.toml
 single-char-binding-names-threshold = 10
 too-many-arguments-threshold = 16
-cyclomatic-complexity-threshold = 40
+cognitive-complexity-threshold = 40
+trivial-copy-size-limit = 16 # 128-bits = 2 64-bit registers
\ No newline at end of file
--- a/crates/avformat-sys/Cargo.toml
+++ b/crates/avformat-sys/Cargo.toml
+[package]
+name = "avformat-sys"
+version = "0.1.0"
+authors = ["Luca Barbato <lu_zero@gentoo.org>"]
+license = "MIT"
+description = "FFI bindings to ffmpeg"
+edition = "2018"
+build = "build.rs"
+[package.metadata.pkg-config]
+libavformat = "58.18.102"
+libavcodec = "58.0.0"
+libavutil = "56.0.0"
+[features]
+build_sources = []
+[build-dependencies]
+bindgen = "0.49"
+metadeps = "1.1.2"
+[dependencies]
No results found