Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • xiph/rav1e
  • tterribe/rav1e
2 results
Show changes
Commits on Source (868)
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = space
# Some tools that work with text may not work correctly without the newline
# control character at the end of the last line. This character helps them to
# understand that the line is complete and can be considered as a line.
insert_final_newline = true
trim_trailing_whitespace = true
[*.md]
# Trailing whitespace may have a special meaning. For example, two spaces at the
# end of a line means a line break.
trim_trailing_whitespace = false
[*.py]
indent_size = 4
......@@ -3,3 +3,4 @@ target
Cargo.lock
*.y4m
*.ivf
secret_key
[submodule "aom"]
path = aom_build/aom
url = https://gitlab.xiph.org/xiph/aom-rav1e.git
branch = rav1e_16b
Luca Barbato <lu_zero@gentoo.org> <luca.barbato@gmail.com>
Frank Bossen <fbossen@gmail.com> <frank@bossentech.com>
Yushin Cho <ycho@mozilla.com> <cho.yushin@gmail.com>
Yushin Cho <ycho@mozilla.com> <ycho@users.noreply.github.com>
Thomas Daede <tdaede@xiph.org> <daede003@umn.edu>
Nathan E. Egge <negge@xiph.org> <negge@dgql.org>
Josh Holmer <jholmer.in@gmail.com>
Josh Holmer <jholmer.in@gmail.com> <jholmer@dminc.com>
Monty Montgomery <monty@xiph.org> <xiphmont@gmail.com>
Thomas Szymczak <11669680+tszymczak@users.noreply.github.com> Thomas Szymczak <you@example.com>
Raphaël Zumer <rzumer@tebako.net>
Raphaël Zumer <rzumer@tebako.net> <rzumer@gmail.com>
language: rust
rust:
- 1.35.0
env:
- RUST_BACKTRACE=1
addons:
apt:
packages:
- nasm
packages: binutils-dev libcurl4-openssl-dev zlib1g-dev libdw-dev libiberty-dev ninja-build
before_install:
- wget -O cmake.sh https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.sh
- wget -O cmake.sh https://cmake.org/files/v3.13/cmake-3.13.3-Linux-x86_64.sh
- sudo sh cmake.sh --skip-license --exclude-subdir --prefix=/usr
- sudo rm -fR /usr/local/cmake*
- hash -r
- which cmake
- cmake --version
- wget https://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.xz
- tar -xvf nasm-2.13.03.tar.xz
- cd nasm-2.13.03
- ./configure
- make
- sudo make install
- curl -L https://github.com/mozilla/sccache/releases/download/0.2.8/sccache-0.2.8-x86_64-unknown-linux-musl.tar.gz | tar xvz
- export PATH=$PATH:`pwd`/sccache-0.2.8-x86_64-unknown-linux-musl
- export RUSTC_WRAPPER=sccache
- export SCCACHE_CACHE_SIZE=500M
- export SCCACHE_DIR=~/.cache/sccache
- curl -L https://download.videolan.org/contrib/nasm/nasm-2.14.tar.gz | tar xvz
- cd nasm-2.14
- ./configure CC='sccache gcc' && make -j2 && sudo make install
- nasm --version
script:
- |
cargo build --verbose &&
cargo test --verbose &&
cargo test --verbose --release --features=decode_test -- --ignored &&
cargo bench --verbose &&
cargo doc --verbose
- cd ..
- curl -L https://github.com/SimonKagstrom/kcov/archive/v36.tar.gz | tar xvz
- cd kcov-36
- mkdir .build && cd .build
- cmake -GNinja -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache .. && ninja && sudo ninja install
- cd ../..
- git clone --depth 1 -b v1.0.0-errata1 https://aomedia.googlesource.com/aom
- cd aom
- rm -rf CMakeCache.txt CMakeFiles
- mkdir -p .build
- cd .build
- cmake -GNinja .. -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1 -DCMAKE_INSTALL_PREFIX=/usr -DCONFIG_PIC=1
- ninja && sudo ninja install
- cd ../..
cache:
directories:
- "$HOME/.cache/sccache"
after_script:
- sccache -s
jobs:
include:
- name: "Build & Coveralls"
script:
- cargo install cargo-kcov
- kcov --version
- RUSTFLAGS="-C link-dead-code" cargo build --features=decode_test,quick_test --tests --verbose
- travis_wait cargo kcov -v --coveralls --no-clean-rebuild -- --verify --exclude-pattern=$HOME/.cargo,aom_build,.h,test
- name: "Tests"
script: cargo test --verbose --release --features=decode_test -- --ignored
- name: "Bench"
script: cargo bench --features=bench --verbose
- name: "Doc & Clippy (linter): verifying code quality"
script:
- cargo doc --verbose --no-deps
- rustup component add clippy
- cargo clippy --version
- cargo clippy -- -D warnings -A clippy::cast_lossless -A clippy::cast_ptr_alignment -A clippy::cognitive_complexity -A clippy::needless_range_loop -A clippy::too_many_arguments -A clippy::verbose_bit_mask -A clippy::unreadable_literal --verbose
......@@ -2,48 +2,72 @@
name = "rav1e"
version = "0.1.0"
authors = ["Thomas Daede <tdaede@xiph.org>"]
edition = "2018"
build = "build.rs"
include = ["/src/**", "/aom_build/**", "/Cargo.toml"]
include = ["/src/**", "/Cargo.toml", "/build.rs"]
license = "BSD-2-Clause"
description = "The fastest and safest AV1 encoder"
repository = "https://github.com/xiph/rav1e/"
autobenches = false
autobins = false
[features]
repl = ["rustyline", "binaries"]
comparative_bench = []
decode_test = ["bindgen"]
binaries = ["y4m", "clap"]
default = ["binaries"]
decode_test = ["aom-sys"]
decode_test_dav1d = ["dav1d-sys"]
binaries = ["ivf", "y4m", "clap", "scan_fmt", "serde_json"]
default = ["binaries", "nasm", "signal_support"]
nasm = ["nasm-rs"]
signal_support = ["signal-hook"]
dump_ivf = ["ivf"]
quick_test = []
desync_finder = []
bench = []
[dependencies]
arg_enum_proc_macro = "0.1.1"
bitstream-io = "0.8"
clap = { version = "2", optional = true }
clap = { version = "2", optional = true, default-features = false }
libc = "0.2"
rand = "0.5"
rustyline = { version = "1", optional = true }
y4m = { version = "0.3", optional = true }
y4m = { version = "0.3.2", optional = true }
backtrace = "0.3"
syn = "^0.15.20"
quote = "^0.6.10" # hack for proc-macro-hack
num-traits = "0.2"
num-derive = "0.2"
paste = "0.1"
serde = "1.0"
serde_derive = "1.0"
serde_json = { version = "1.0", optional = true }
dav1d-sys = { version = "0.2", optional = true }
aom-sys = { version = "0.1.2", optional = true }
scan_fmt = { version = "0.2", optional = true }
ivf = { version = "0.1", path = "ivf/", optional = true }
avformat-sys = { version = "0.1", path = "crates/avformat-sys/", optional = true }
rayon = "1.0"
bincode = "1.1"
arrayvec = "0.4.10"
[build-dependencies]
cmake = "0.1.32"
[target.'cfg(target_arch = "x86_64")'.build-dependencies]
nasm-rs = { git = "https://github.com/tdaede/nasm-rs.git" }
nasm-rs = { version = "0.1", path = "crates/nasm_rs/", optional = true }
vergen = "3"
[target.'cfg(unix)'.build-dependencies]
pkg-config = "0.3.12"
bindgen = { version = "0.37", optional = true }
[target.'cfg(unix)'.dependencies]
signal-hook = { version = "0.1.9", optional = true }
[dev-dependencies]
criterion = "0.2"
pretty_assertions = "0.6"
interpolate_name = "0.2.2"
rand = "0.6"
rand_chacha = "0.1"
semver = "0.9"
[[bin]]
name = "rav1e"
bench = false
[[bin]]
name = "rav1repl"
required-features = ["repl"]
required-features = ["binaries"]
bench = false
[lib]
......@@ -53,10 +77,18 @@ bench = false
name = "bench"
harness = false
[profile.dev]
opt-level = 2
[profile.release]
codegen-units = 1 # if > 1 enables parallel code generation which improves
# compile times, but prevents some optimizations.
# Passes `-C codegen-units`. Ignored when `lto = true`.
lto = true
[profile.bench]
codegen-units = 1
lto = true
[workspace]
members = [".", "ivf", "crates/nasm_rs", "crates/avformat-sys"]
BSD 2-Clause License
Copyright (c) 2017-2018, the rav1e contributors
Copyright (c) 2017-2019, the rav1e contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without
......
The fastest and safest AV1 encoder.
[![Build Status](https://travis-ci.org/xiph/rav1e.svg?branch=master)](https://travis-ci.org/xiph/rav1e)
[![Travis Build Status](https://travis-ci.org/xiph/rav1e.svg?branch=master)](https://travis-ci.org/xiph/rav1e)
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/xiph/rav1e?branch=master&svg=true)](https://ci.appveyor.com/project/tdaede/rav1e/history)
[![Coverage Status](https://coveralls.io/repos/github/xiph/rav1e/badge.svg?branch=master)](https://coveralls.io/github/xiph/rav1e?branch=master)
# Overview
rav1e is an experimental AV1 video encoder. It is designed to eventually cover all use cases, though in its current form it is most suitable for cases where libaom (the reference encoder) is too slow.
rav1e temporarily uses libaom's transforms and CDF initialization tables, but is otherwise an independent implementation.
# Features
* Intra and inter frames
* 64x64 superblocks
* 4x4 to 32x32 RDO-selected square blocks
* DC, H, V, Paeth, and smooth prediction modes
* 4x4 DCT and ADST transforms
* 4x4 to 64x64 RDO-selected square and 2:1/1:2 rectangular blocks
* DC, H, V, Paeth, smooth, and a subset of directional prediction modes
* DCT, ADST and identity transforms (up to 64x64, 16x16 and 32x32 respectively)
* 8-, 10- and 12-bit depth color
* 4:2:0 (full support), 4:2:2 and 4:4:4 (limited) chroma sampling
* Variable speed settings
* ~10 fps encoding @ 480p
* Near real-time encoding at high speed levels
# Releases
For the foreseeable future, a weekly pre-release of rav1e will be [published](https://github.com/xiph/rav1e/releases) every Tuesday.
# Windows builds
......@@ -25,21 +30,32 @@ Automated AppVeyor builds can be found [here](https://ci.appveyor.com/project/td
# Building
This repository uses a git submodule. To initialize it, run:
**rav1e** can optionally use either `libaom` (default) or a `dav1d` installation to run some extended tests.
Some `x86_64`-specific optimizations require a recent version of NASM.
In order to build, test and link to the codec on UNIX, you need Perl, NASM, CMake, Clang and pkg-config. To install this on Ubuntu or Linux Mint, run:
```
git submodule update --init
sudo apt install perl nasm cmake clang pkg-config
```
This is also required every time you switch branches or pull a submodule change.
On Windows, pkg-config is not required. A Perl distribution such as Strawberry Perl, CMake, and a NASM binary in your system PATH are required.
In order to build, test and link to the codec on UNIX, you need Perl, NASM, CMake, Clang and pkg-config. To install this on Ubuntu or Linux Mint, run:
To build release binary in `target/release/rav1e` run:
```
sudo apt install perl nasm cmake clang pkg-config
cargo build --release
```
On Windows, pkg-config is not required. A Perl distribution such as Strawberry Perl, CMake, and a NASM binary in your system PATH are required.
## Building the C-API
**rav1e** provides a C-compatible set of library, header and pkg-config file.
To build and install it you can use [cargo-c](https://crates.io/crates/cargo-c):
```
cargo install cargo-c
cargo cinstall --release
```
# Compressing video
......@@ -50,16 +66,22 @@ cargo run --release --bin rav1e -- input.y4m -o output.ivf
```
# Decompressing video
Encoder output should be compatible with any AV1 decoder compliant with the v1.0.0 specification. You can also build the included compatible aomdec using the following:
Encoder output should be compatible with any AV1 decoder compliant with the v1.0.0 specification. You can build compatible aomdec using the following:
```
mkdir aom_test
cd aom_test
cmake ../aom_build/aom -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=0 -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1
cmake /path/to/aom -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=0 -DENABLE_TESTS=0 -DENABLE_DOCS=0 -DCONFIG_LOWBITDEPTH=1
make -j8
./aomdec ../output.ivf -o output.y4m
```
# Configuring
rav1e has several optional features that can be enabled by passing --features to cargo test. Passing --all-features is discouraged.
* nasm - enabled by default. When enabled, assembly is built for x86_64.
# Using the AOMAnalyzer
## Local Analyzer
......@@ -80,7 +102,7 @@ https://arewecompressedyet.com/analyzer/?d=https://people.xiph.org/~mbebenita/an
* src/context.rs - High-level functions that write symbols to the bitstream, and maintain context.
* src/ec.rs - Low-level implementation of the entropy coder, which directly writes the bitstream.
* src/lib.rs - The top level library, contains code to write headers, manage buffers, and iterate throught each superblock.
* src/lib.rs - The top level library, contains code to write headers, manage buffers, and iterate through each superblock.
* src/partition.rs - Functions and enums to manage partitions (subdivisions of a superblock).
* src/predict.rs - Intra prediction implementations.
* src/quantize.rs - Quantization and dequantization functions for coefficients.
......@@ -89,46 +111,47 @@ https://arewecompressedyet.com/analyzer/?d=https://people.xiph.org/~mbebenita/an
* src/util.rs - Misc utility code.
* src/bin/rav1e.rs - rav1e command line tool.
* src/bin/rav1erepl.rs - Command line tool for debugging.
* aom_build/ - Local submodule of libaom. Some C functions and constants are used directly. Also used for benchmarking and testing.
# Contributing
## Coding style
Check code formatting with [rustfmt](https://github.com/rust-lang-nursery/rustfmt) before submitting a PR.
rav1e currently uses the nightly version of rustfmt.
To install nightly:
## Toolchain
rav1e uses the stable version of Rust (the stable toolchain).
To install the toolchain:
```
rustup install nightly
rustup install stable
```
To install the nightly version of rustfmt:
## Coding style
Check code formatting with [rustfmt](https://github.com/rust-lang-nursery/rustfmt) before submitting a PR.
To install the rustfmt:
```
rustup component add rustfmt-preview --toolchain nightly
rustup component add rustfmt
```
then
```
cargo +nightly fmt -- --check
cargo fmt -- --check
```
You should also try [clippy](https://github.com/rust-lang-nursery/rust-clippy).
Rust also uses nightly for clippy.
## Code Analysis
The [clippy](https://github.com/rust-lang-nursery/rust-clippy) will help catch common mistakes and improve your Rust code.
We recommend you use it before submitting a PR.
To install clippy:
```
rustup component add clippy-preview --toolchain nightly
rustup component add clippy
```
then
then you can search "cargo clippy" in [.travis.yml](https://github.com/xiph/rav1e/blob/master/.travis.yml) for detailed command and run it.
```
cargo +nightly clippy
```
## Testing
Run unit tests with:
......@@ -138,17 +161,17 @@ cargo test
Run encode-decode integration tests with:
```
cargo test --release --features=decode_test -- --ignored
cargo test --release --features=decode_test
```
Run regular benchmarks with:
Run the encode-decode tests against `dav1d` with:
```
cargo bench
cargo test --release --features=decode_test_dav1d
```
Run comparative benchmarks with:
Run regular benchmarks with:
```
cargo bench --features=comparative_bench
cargo bench --features=bench
```
# Getting in Touch
......
Subproject commit a6ea77d15da5f0c2f74e75147452c382d802565d
os: Visual Studio 2017
image: Visual Studio 2019
environment:
matrix:
- channel: stable
host: x86_64-pc-windows-msvc
matrix:
- platform: x86_64
target: x86_64-pc-windows-msvc
channel: stable
- platform: arm64
target: aarch64-pc-windows-msvc
channel: nightly
matrix:
allow_failures:
- platform: arm64
install:
- call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
- call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
- appveyor DownloadFile https://www.nasm.us/pub/nasm/releasebuilds/2.13.03/win64/nasm-2.13.03-win64.zip -FileName nasm.zip
- appveyor DownloadFile https://people.xiph.org/~tdaede/nasm-2.14.02-win64.zip -FileName nasm.zip
- appveyor DownloadFile https://github.com/mozilla/sccache/releases/download/0.2.8/sccache-0.2.8-x86_64-pc-windows-msvc.tar.gz
- tar xzf sccache-0.2.8-x86_64-pc-windows-msvc.tar.gz
- 7z e -y nasm.zip
- rustup-init -yv --default-toolchain %channel% --default-host %target%
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin;%APPVEYOR_BUILD_FOLDER%
- rustup-init -yv --default-toolchain %channel% --default-host %host%
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin;%APPVEYOR_BUILD_FOLDER%;nasm-2.14.02;sccache-0.2.8-x86_64-pc-windows-msvc
- set RUSTC_WRAPPER=sccache
- set SCCACHE_CACHE_SIZE=500M
- set SCCACHE_DIR=%LOCALAPPDATA%\Mozilla\sccache
- ps: $Env:PKG_CONFIG_ALLOW_CROSS=1
- rustc -vV
- cargo -vV
- rustup target add %target%
on_success:
- sccache -s
cache:
- '%LOCALAPPDATA%\Mozilla\sccache'
build_script:
- git submodule update --init
- cargo build --release
- cargo build --release --target=%target%
test_script:
- git submodule update --init
- cargo test --verbose
- cargo test --target=%target% --verbose
artifacts:
- path: target\release\rav1e.exe
name: rav1e
- path: target\$(target)\release\rav1e.exe
name: rav1e-$(platform)
deploy:
- provider: GitHub
artifact: target\$(target)\release\rav1e.exe
auth_token:
secure: 'LPBjNyFOg+vBkVR4w+89YVNhByaXBGNwtN6UwkFkWTfPow5oeCbFMtJavU9ZLs+c'
prerelease: true
on:
appveyor_repo_tag: true
......@@ -7,28 +7,24 @@
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
#[macro_use]
extern crate criterion;
extern crate rand;
extern crate rav1e;
mod predict;
mod transform;
mod me;
mod dist;
use criterion::*;
use rav1e::cdef::cdef_filter_frame;
use rav1e::context::*;
use rav1e::ec;
use rav1e::partition::*;
use rav1e::predict::*;
use rav1e::rdo::rdo_cfl_alpha;
use rav1e::*;
use rav1e::bench::api::*;
use rav1e::bench::encoder::*;
use rav1e::bench::cdef::*;
use rav1e::bench::context::*;
use rav1e::bench::ec::*;
use rav1e::bench::partition::*;
use rav1e::bench::predict::*;
use rav1e::bench::transform::*;
use rav1e::bench::rdo::*;
use transform::transform;
use crate::transform::transform;
#[cfg(feature = "comparative_bench")]
mod comparative;
use criterion::*;
use std::time::Duration;
fn write_b(c: &mut Criterion) {
for &tx_size in &[TxSize::TX_4X4, TxSize::TX_8X8] {
......@@ -40,18 +36,24 @@ fn write_b(c: &mut Criterion) {
}
fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
unsafe {
av1_rtcd();
aom_dsp_rtcd();
}
let config =
EncoderConfig { quantizer: qindex, speed: 10, ..Default::default() };
let mut fi = FrameInvariants::new(1024, 1024, config);
let mut w = ec::WriterEncoder::new();
let fc = CDFContext::new(fi.base_q_idx);
let bc = BlockContext::new(fi.sb_width * 16, fi.sb_height * 16);
let config = EncoderConfig {
width: 1024,
height: 1024,
quantizer: qindex,
speed_settings: SpeedSettings::from_preset(10),
..Default::default()
};
let sequence = Sequence::new(&Default::default());
let mut fi = FrameInvariants::<u16>::new(config, sequence);
let mut w = WriterEncoder::new();
let mut fc = CDFContext::new(fi.base_q_idx);
let mut fb = FrameBlocks::new(fi.sb_width * 16, fi.sb_height * 16);
let mut tb = fb.as_tile_blocks_mut();
let bc = BlockContext::new(&mut tb);
let mut fs = FrameState::new(&fi);
let mut cw = ContextWriter::new(fc, bc);
let mut ts = fs.as_tile_state_mut();
// For now, restoration unit size is locked to superblock size.
let mut cw = ContextWriter::new(&mut fc, bc);
let tx_type = TxType::DCT_DCT;
......@@ -62,8 +64,8 @@ fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
b.iter(|| {
for &mode in RAV1E_INTRA_MODES {
let sbo = SuperBlockOffset { x: sbx, y: sby };
fs.qc.update(fi.base_q_idx, tx_size, mode.is_intra(), 8);
for p in 1..3 {
ts.qc.update(fi.base_q_idx, tx_size, mode.is_intra(), 8, fi.dc_delta_q[p], fi.ac_delta_q[p]);
for by in 0..8 {
for bx in 0..8 {
// For ex, 8x8 tx should be applied to even numbered (bx,by)
......@@ -74,23 +76,24 @@ fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
};
let bo = sbo.block_offset(bx, by);
let tx_bo = BlockOffset { x: bo.x + bx, y: bo.y + by };
let po = tx_bo.plane_offset(&fs.input.planes[p].cfg);
let po = tx_bo.plane_offset(&ts.input.planes[p].cfg);
encode_tx_block(
&mut fi,
&mut fs,
&mut ts,
&mut cw,
&mut w,
p,
&bo,
bo,
mode,
tx_size,
tx_type,
tx_size.block_size(),
&po,
po,
false,
8,
ac,
0
0,
RDOType::PixelDistRealRate,
true
);
}
}
......@@ -106,14 +109,20 @@ fn cdef_frame(c: &mut Criterion) {
c.bench_function(&n, move |b| cdef_frame_bench(b, w, h));
}
fn cdef_frame_bench(b: &mut Bencher, w: usize, h: usize) {
let config =
EncoderConfig { quantizer: 100, speed: 10, ..Default::default() };
let fi = FrameInvariants::new(w, h, config);
let mut bc = BlockContext::new(fi.sb_width * 16, fi.sb_height * 16);
fn cdef_frame_bench(b: &mut Bencher, width: usize, height: usize) {
let config = EncoderConfig {
width,
height,
quantizer: 100,
speed_settings: SpeedSettings::from_preset(10),
..Default::default()
};
let sequence = Sequence::new(&Default::default());
let fi = FrameInvariants::<u16>::new(config, sequence);
let fb = FrameBlocks::new(fi.sb_width * 16, fi.sb_height * 16);
let mut fs = FrameState::new(&fi);
b.iter(|| cdef_filter_frame(&fi, &mut fs.rec, &mut bc, 8));
b.iter(|| cdef_filter_frame(&fi, &mut fs.rec, &fb));
}
fn cfl_rdo(c: &mut Criterion) {
......@@ -129,12 +138,44 @@ fn cfl_rdo(c: &mut Criterion) {
}
fn cfl_rdo_bench(b: &mut Bencher, bsize: BlockSize) {
let config =
EncoderConfig { quantizer: 100, speed: 10, ..Default::default() };
let fi = FrameInvariants::new(1024, 1024, config);
let config = EncoderConfig {
width: 1024,
height: 1024,
quantizer: 100,
speed_settings: SpeedSettings::from_preset(10),
..Default::default()
};
let sequence = Sequence::new(&Default::default());
let fi = FrameInvariants::<u16>::new(config, sequence);
let mut fs = FrameState::new(&fi);
let mut ts = fs.as_tile_state_mut();
let offset = BlockOffset { x: 1, y: 1 };
b.iter(|| rdo_cfl_alpha(&mut fs, &offset, bsize, 8))
b.iter(|| rdo_cfl_alpha(&mut ts, offset, bsize, fi.sequence.bit_depth))
}
fn ec_bench(c: &mut Criterion) {
c.bench_function("update_cdf_4_native", update_cdf_4_native);
c.bench_function("update_cdf_4_sse2", update_cdf_4_sse2);
}
fn update_cdf_4_native(b: &mut Bencher) {
let mut cdf = [7296, 3819, 1616, 0, 0];
b.iter(|| {
for i in 0..1000 {
WriterBase::<WriterRecorder>::update_cdf(&mut cdf, i & 3);
black_box(cdf);
}
});
}
fn update_cdf_4_sse2(b: &mut Bencher) {
let mut cdf = [7296, 3819, 1616, 0, 0];
b.iter(|| {
for i in 0..1000 {
WriterBase::<WriterRecorder>::update_cdf_4_sse2(&mut cdf, i & 3);
black_box(cdf);
}
});
}
criterion_group!(intra_prediction, predict::pred_bench,);
......@@ -142,10 +183,12 @@ criterion_group!(intra_prediction, predict::pred_bench,);
criterion_group!(cfl, cfl_rdo);
criterion_group!(cdef, cdef_frame);
criterion_group!(write_block, write_b);
criterion_group!(me, me::get_sad);
criterion_group!{ name = me;
config = Criterion::default().warm_up_time(Duration::new(1,0));
targets = dist::get_sad
}
criterion_group!(ec, ec_bench);
#[cfg(feature = "comparative_bench")]
criterion_main!(comparative::intra_prediction);
criterion_main!(write_block, intra_prediction, cdef, cfl, me, transform, ec);
#[cfg(not(feature = "comparative_bench"))]
criterion_main!(write_block, intra_prediction, cdef, cfl, me, transform);
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
use comparative::libc;
use criterion::*;
use predict as predict_native;
use predict::*;
use rand::{ChaChaRng, Rng, SeedableRng};
extern {
fn highbd_dc_predictor(
dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
above: *const u16, left: *const u16, bd: libc::c_int
);
fn highbd_h_predictor(
dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
above: *const u16, left: *const u16, bd: libc::c_int
);
fn highbd_v_predictor(
dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
above: *const u16, left: *const u16, bd: libc::c_int
);
fn highbd_paeth_predictor(
dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
above: *const u16, left: *const u16, bd: libc::c_int
);
fn highbd_smooth_predictor(
dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
above: *const u16, left: *const u16, bd: libc::c_int
);
fn highbd_smooth_h_predictor(
dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
above: *const u16, left: *const u16, bd: libc::c_int
);
fn highbd_smooth_v_predictor(
dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int, bh: libc::c_int,
above: *const u16, left: *const u16, bd: libc::c_int
);
fn cfl_predict_hbd_c(
ac_buf_q3: *const i16, dst: *mut u16, stride: libc::ptrdiff_t,
alpha_q3: libc::c_int, bd: libc::c_int, bw: libc::c_int, bh: libc::c_int
);
}
fn predict_intra_4x4_aom(
b: &mut Bencher,
predictor: unsafe extern fn(
*mut u16,
libc::ptrdiff_t,
libc::c_int,
libc::c_int,
*const u16,
*const u16,
libc::c_int
)
) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let (mut block, above_context, left_context) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
unsafe {
predictor(
block.as_mut_ptr(),
BLOCK_SIZE.width() as libc::ptrdiff_t,
4,
4,
above_context.as_ptr(),
left_context.as_ptr(),
8
);
}
}
})
}
pub fn intra_bench(c: &mut Criterion) {
c.bench_functions(
"intra_dc_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_dc_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| {
predict_intra_4x4_aom(b, highbd_dc_predictor)
}),
],
None
);
c.bench_functions(
"intra_h_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_h_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| {
predict_intra_4x4_aom(b, highbd_h_predictor)
}),
],
None
);
c.bench_functions(
"intra_v_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_v_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| {
predict_intra_4x4_aom(b, highbd_v_predictor)
}),
],
None
);
c.bench_functions(
"intra_paeth_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_paeth_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| {
predict_intra_4x4_aom(b, highbd_paeth_predictor)
}),
],
None
);
c.bench_functions(
"intra_smooth_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_smooth_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| {
predict_intra_4x4_aom(b, highbd_smooth_predictor)
}),
],
None
);
c.bench_functions(
"intra_smooth_h_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_smooth_h_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| {
predict_intra_4x4_aom(b, highbd_smooth_h_predictor)
}),
],
None
);
c.bench_functions(
"intra_smooth_v_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_smooth_v_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| {
predict_intra_4x4_aom(b, highbd_smooth_v_predictor)
}),
],
None
);
c.bench_functions(
"intra_cfl_4x4",
vec![
Fun::new("native", |b, _: &Option<usize>| {
predict_native::intra_cfl_4x4(b)
}),
Fun::new("aom", |b, _: &Option<usize>| intra_cfl_4x4_aom(b)),
],
None
);
}
pub fn intra_cfl_4x4_aom(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let (mut block, _above_context, _left_context) = generate_block(&mut rng);
let ac: Vec<i16> = (0..(32 * 32)).map(|_| rng.gen()).collect();
let alpha = -1 as i16;
b.iter(|| {
for _ in 0..MAX_ITER {
unsafe {
cfl_predict_hbd_c(
ac.as_ptr(),
block.as_mut_ptr(),
BLOCK_SIZE.width() as libc::ptrdiff_t,
alpha as libc::c_int,
8,
4,
4
);
}
}
})
}
......@@ -8,22 +8,25 @@
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
use criterion::*;
use partition::*;
use plane::*;
use rand::{ChaChaRng, Rng, SeedableRng};
use rav1e::me;
use rav1e::bench::dist;
use rav1e::bench::partition::*;
use rav1e::bench::partition::BlockSize::*;
use rav1e::bench::frame::*;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use rav1e::Pixel;
fn fill_plane(ra: &mut ChaChaRng, plane: &mut Plane) {
fn fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>) {
let stride = plane.cfg.stride;
for row in plane.data.chunks_mut(stride) {
for mut pixel in row {
for pixel in row {
let v: u8 = ra.gen();
*pixel = v as u16;
*pixel = T::cast_from(v);
}
}
}
fn new_plane(ra: &mut ChaChaRng, width: usize, height: usize) -> Plane {
fn new_plane<T: Pixel>(ra: &mut ChaChaRng, width: usize, height: usize) -> Plane<T> {
let mut p = Plane::new(width, height, 0, 0, 128 + 8, 128 + 8);
fill_plane(ra, &mut p);
......@@ -31,49 +34,80 @@ fn new_plane(ra: &mut ChaChaRng, width: usize, height: usize) -> Plane {
p
}
fn bench_get_sad(b: &mut Bencher, bs: &BlockSize) {
fn run_sad_bench<T: Pixel>(b: &mut Bencher, &(bs, bit_depth): &(BlockSize, usize)) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let bsw = bs.width();
let bsh = bs.height();
let w = 640;
let h = 480;
let input_plane = new_plane(&mut ra, w, h);
let rec_plane = new_plane(&mut ra, w, h);
let po = PlaneOffset { x: 0, y: 0 };
let input_plane = new_plane::<T>(&mut ra, w, h);
let rec_plane = new_plane::<T>(&mut ra, w, h);
let plane_org = input_plane.slice(&po);
let plane_ref = rec_plane.slice(&po);
let plane_org = input_plane.as_region();
let plane_ref = rec_plane.as_region();
b.iter(|| {
let _ = me::get_sad(&plane_org, &plane_ref, bsw, bsh);
let _ =
black_box(dist::get_sad(&plane_org, &plane_ref, bsw, bsh, bit_depth));
})
}
fn bench_get_sad(b: &mut Bencher, &(bs, bit_depth): &(BlockSize, usize)) {
if bit_depth <= 8 {
run_sad_bench::<u8>(b, &(bs, bit_depth))
}
else {
run_sad_bench::<u16>(b, &(bs, bit_depth))
}
}
pub fn get_sad(c: &mut Criterion) {
use partition::BlockSize::*;
let blocks = vec![
BLOCK_4X4,
BLOCK_4X8,
BLOCK_8X4,
BLOCK_8X8,
BLOCK_8X16,
BLOCK_16X8,
BLOCK_16X16,
BLOCK_16X32,
BLOCK_32X16,
BLOCK_32X32,
BLOCK_32X64,
BLOCK_64X32,
BLOCK_64X64,
BLOCK_64X128,
BLOCK_128X64,
BLOCK_128X128,
BLOCK_4X16,
BLOCK_16X4,
BLOCK_8X32,
BLOCK_32X8,
BLOCK_16X64,
BLOCK_64X16,
(BLOCK_4X4, 8),
(BLOCK_4X8, 8),
(BLOCK_8X4, 8),
(BLOCK_8X8, 8),
(BLOCK_8X16, 8),
(BLOCK_16X8, 8),
(BLOCK_16X16, 8),
(BLOCK_16X32, 8),
(BLOCK_32X16, 8),
(BLOCK_32X32, 8),
(BLOCK_32X64, 8),
(BLOCK_64X32, 8),
(BLOCK_64X64, 8),
(BLOCK_64X128, 8),
(BLOCK_128X64, 8),
(BLOCK_128X128, 8),
(BLOCK_4X16, 8),
(BLOCK_16X4, 8),
(BLOCK_8X32, 8),
(BLOCK_32X8, 8),
(BLOCK_16X64, 8),
(BLOCK_64X16, 8),
(BLOCK_4X4, 10),
(BLOCK_4X8, 10),
(BLOCK_8X4, 10),
(BLOCK_8X8, 10),
(BLOCK_8X16, 10),
(BLOCK_16X8, 10),
(BLOCK_16X16, 10),
(BLOCK_16X32, 10),
(BLOCK_32X16, 10),
(BLOCK_32X32, 10),
(BLOCK_32X64, 10),
(BLOCK_64X32, 10),
(BLOCK_64X64, 10),
(BLOCK_64X128, 10),
(BLOCK_128X64, 10),
(BLOCK_128X128, 10),
(BLOCK_4X16, 10),
(BLOCK_16X4, 10),
(BLOCK_8X32, 10),
(BLOCK_32X8, 10),
(BLOCK_16X64, 10),
(BLOCK_64X16, 10)
];
c.bench_function_over_inputs("get_sad", bench_get_sad, blocks);
......
......@@ -8,15 +8,20 @@
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
use criterion::*;
use rand::{ChaChaRng, Rng, SeedableRng};
use rav1e::partition::BlockSize;
use rav1e::predict::{Block4x4, Intra};
use rand::{Rng, RngCore, SeedableRng};
use rand_chacha::ChaChaRng;
use rav1e::bench::partition::BlockSize;
use rav1e::bench::predict::{Block4x4, Intra};
use rav1e::bench::frame::*;
use rav1e::bench::util::*;
pub const MAX_ITER: usize = 50000;
pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32;
pub fn generate_block(rng: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
let block = vec![0u16; BLOCK_SIZE.width() * BLOCK_SIZE.height()];
pub fn generate_block(rng: &mut ChaChaRng) -> (Plane<u16>, Vec<u16>, Vec<u16>) {
let block = Plane::wrap(
vec![0u16; BLOCK_SIZE.width() * BLOCK_SIZE.height()],
BLOCK_SIZE.width(),
);
let above_context: Vec<u16> =
(0..BLOCK_SIZE.height()).map(|_| rng.gen()).collect();
let left_context: Vec<u16> =
......@@ -25,64 +30,92 @@ pub fn generate_block(rng: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
(block, above_context, left_context)
}
pub fn generate_block_u8<'a>(
rng: &mut ChaChaRng, edge_buf: &'a mut AlignedArray<[u8; 65]>
) -> (Plane<u8>, &'a [u8], &'a [u8]) {
let block = Plane::wrap(
vec![0u8; BLOCK_SIZE.width() * BLOCK_SIZE.height()],
BLOCK_SIZE.width(),
);
rng.fill_bytes(&mut edge_buf.array);
let above_context = &edge_buf.array[33..];
let left_context = &edge_buf.array[..32];
(block, above_context, left_context)
}
pub fn bench_pred_fn<F>(c: &mut Criterion, id: &str, f: F)
where
F: FnMut(&mut Bencher) + 'static
{
let b = Benchmark::new(id, f);
c.bench(
id,
if id.ends_with("_4x4_u8") {
b.throughput(Throughput::Bytes(16))
} else if id.ends_with("_4x4") {
b.throughput(Throughput::Bytes(32))
} else {
b
}
);
}
pub fn pred_bench(c: &mut Criterion) {
c.bench_function("intra_dc_4x4", |b| intra_dc_4x4(b));
c.bench_function("intra_dc_left_4x4", |b| intra_dc_left_4x4(b));
c.bench_function("intra_dc_top_4x4", |b| intra_dc_top_4x4(b));
c.bench_function("intra_h_4x4", |b| intra_h_4x4(b));
c.bench_function("intra_v_4x4", |b| intra_v_4x4(b));
c.bench_function("intra_paeth_4x4", |b| intra_paeth_4x4(b));
c.bench_function("intra_smooth_4x4", |b| intra_smooth_4x4(b));
c.bench_function("intra_smooth_h_4x4", |b| intra_smooth_h_4x4(b));
c.bench_function("intra_smooth_v_4x4", |b| intra_smooth_v_4x4(b));
c.bench_function("intra_cfl_4x4", |b| intra_cfl_4x4(b));
bench_pred_fn(c, "intra_dc_4x4", intra_dc_4x4);
bench_pred_fn(c, "intra_dc_left_4x4", intra_dc_left_4x4);
bench_pred_fn(c, "intra_dc_top_4x4", intra_dc_top_4x4);
bench_pred_fn(c, "intra_h_4x4", intra_h_4x4);
bench_pred_fn(c, "intra_v_4x4", intra_v_4x4);
bench_pred_fn(c, "intra_paeth_4x4", intra_paeth_4x4);
bench_pred_fn(c, "intra_smooth_4x4", intra_smooth_4x4);
bench_pred_fn(c, "intra_smooth_h_4x4", intra_smooth_h_4x4);
bench_pred_fn(c, "intra_smooth_v_4x4", intra_smooth_v_4x4);
bench_pred_fn(c, "intra_cfl_4x4", intra_cfl_4x4);
bench_pred_fn(c, "intra_dc_4x4_u8", intra_dc_4x4_u8);
bench_pred_fn(c, "intra_dc_128_4x4_u8", intra_dc_128_4x4_u8);
bench_pred_fn(c, "intra_dc_left_4x4_u8", intra_dc_left_4x4_u8);
bench_pred_fn(c, "intra_dc_top_4x4_u8", intra_dc_top_4x4_u8);
bench_pred_fn(c, "intra_h_4x4_u8", intra_h_4x4_u8);
bench_pred_fn(c, "intra_v_4x4_u8", intra_v_4x4_u8);
bench_pred_fn(c, "intra_paeth_4x4_u8", intra_paeth_4x4_u8);
bench_pred_fn(c, "intra_smooth_4x4_u8", intra_smooth_4x4_u8);
bench_pred_fn(c, "intra_smooth_h_4x4_u8", intra_smooth_h_4x4_u8);
bench_pred_fn(c, "intra_smooth_v_4x4_u8", intra_smooth_v_4x4_u8);
}
pub fn intra_dc_4x4(b: &mut Bencher) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block(&mut ra);
let mut rng = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_dc(
&mut block,
BLOCK_SIZE.width(),
&above[..4],
&left[..4]
);
}
Block4x4::pred_dc(&mut block.as_region_mut(), &above[..4], &left[..4]);
})
}
pub fn intra_dc_left_4x4(b: &mut Bencher) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block(&mut ra);
let mut rng = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_dc_left(
&mut block,
BLOCK_SIZE.width(),
&above[..4],
&left[..4]
);
}
Block4x4::pred_dc_left(
&mut block.as_region_mut(),
&above[..4],
&left[..4]
);
})
}
pub fn intra_dc_top_4x4(b: &mut Bencher) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block(&mut ra);
let mut rng = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_dc_top(
&mut block,
BLOCK_SIZE.width(),
&above[..4],
&left[..4]
);
}
Block4x4::pred_dc_top(
&mut block.as_region_mut(),
&above[..4],
&left[..4]
);
})
}
......@@ -91,9 +124,7 @@ pub fn intra_h_4x4(b: &mut Bencher) {
let (mut block, _above, left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_h(&mut block, BLOCK_SIZE.width(), &left[..4]);
}
Block4x4::pred_h(&mut block.as_region_mut(), &left[..4]);
})
}
......@@ -102,9 +133,7 @@ pub fn intra_v_4x4(b: &mut Bencher) {
let (mut block, above, _left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_v(&mut block, BLOCK_SIZE.width(), &above[..4]);
}
Block4x4::pred_v(&mut block.as_region_mut(), &above[..4]);
})
}
......@@ -114,15 +143,12 @@ pub fn intra_paeth_4x4(b: &mut Bencher) {
let above_left = unsafe { *above.as_ptr().offset(-1) };
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_paeth(
&mut block,
BLOCK_SIZE.width(),
&above[..4],
&left[..4],
above_left
);
}
Block4x4::pred_paeth(
&mut block.as_region_mut(),
&above[..4],
&left[..4],
above_left
);
})
}
......@@ -131,14 +157,11 @@ pub fn intra_smooth_4x4(b: &mut Bencher) {
let (mut block, above, left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_smooth(
&mut block,
BLOCK_SIZE.width(),
&above[..4],
&left[..4]
);
}
Block4x4::pred_smooth(
&mut block.as_region_mut(),
&above[..4],
&left[..4]
);
})
}
......@@ -147,14 +170,11 @@ pub fn intra_smooth_h_4x4(b: &mut Bencher) {
let (mut block, above, left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_smooth_h(
&mut block,
BLOCK_SIZE.width(),
&above[..4],
&left[..4]
);
}
Block4x4::pred_smooth_h(
&mut block.as_region_mut(),
&above[..4],
&left[..4]
);
})
}
......@@ -163,26 +183,158 @@ pub fn intra_smooth_v_4x4(b: &mut Bencher) {
let (mut block, above, left) = generate_block(&mut rng);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_smooth_v(
&mut block,
BLOCK_SIZE.width(),
&above[..4],
&left[..4]
);
}
Block4x4::pred_smooth_v(
&mut block.as_region_mut(),
&above[..4],
&left[..4]
);
})
}
pub fn intra_cfl_4x4(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let (mut block, _above, _left) = generate_block(&mut rng);
let (mut block, above, left) = generate_block(&mut rng);
let ac: Vec<i16> = (0..(32 * 32)).map(|_| rng.gen()).collect();
let alpha = -1 as i16;
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_cfl(&mut block, BLOCK_SIZE.width(), &ac, alpha, 8);
}
Block4x4::pred_cfl(
&mut block.as_region_mut(),
&ac,
alpha,
8,
&above,
&left
);
})
}
pub fn intra_dc_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_dc(
&mut block.as_region_mut(),
&above[..4],
&left[32 - 4..]
);
})
}
pub fn intra_dc_128_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, _above, _left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_dc_128(&mut block.as_region_mut(), 8);
})
}
pub fn intra_dc_left_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_dc_left(
&mut block.as_region_mut(),
&above[..4],
&left[32 - 4..]
);
})
}
pub fn intra_dc_top_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_dc_top(
&mut block.as_region_mut(),
&above[..4],
&left[32 - 4..]
);
})
}
pub fn intra_h_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, _above, left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_h(&mut block.as_region_mut(), &left[32 - 4..]);
})
}
pub fn intra_v_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, _left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_v(&mut block.as_region_mut(), &above[..4]);
})
}
pub fn intra_paeth_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
let above_left = unsafe { *above.as_ptr().offset(-1) };
b.iter(|| {
Block4x4::pred_paeth(
&mut block.as_region_mut(),
&above[..4],
&left[32 - 4..],
above_left
);
})
}
pub fn intra_smooth_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_smooth(
&mut block.as_region_mut(),
&above[..4],
&left[32 - 4..]
);
})
}
pub fn intra_smooth_h_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_smooth_h(
&mut block.as_region_mut(),
&above[..4],
&left[32 - 4..]
);
})
}
pub fn intra_smooth_v_4x4_u8(b: &mut Bencher) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = UninitializedAlignedArray();
let (mut block, above, left) = generate_block_u8(&mut rng, &mut edge_buf);
b.iter(|| {
Block4x4::pred_smooth_v(
&mut block.as_region_mut(),
&above[..4],
&left[32 - 4..]
);
})
}
......@@ -8,8 +8,9 @@
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
use criterion::*;
use rand::{ChaChaRng, Rng, SeedableRng};
use rav1e::transform;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use rav1e::bench::transform;
fn bench_idct4(b: &mut Bencher, bit_depth: &usize) {
let mut ra = ChaChaRng::from_seed([0; 32]);
......
// build.rs
extern crate cmake;
#[cfg(unix)]
extern crate pkg_config;
#[cfg(unix)]
#[cfg(feature = "decode_test")]
extern crate bindgen;
#[cfg(target_arch = "x86_64")]
extern crate nasm_rs;
#[allow(unused_imports)]
use std::env;
use std::fs;
use std::path::Path;
fn main() {
#[cfg(target_arch = "x86_64")] {
use std::fs::File;
use std::io::Write;
let out_dir = env::var("OUT_DIR").unwrap();
{
let dest_path = Path::new(&out_dir).join("config.asm");
let mut config_file = File::create(dest_path).unwrap();
config_file.write(b" %define ARCH_X86_32 0\n").unwrap();
config_file.write(b" %define ARCH_X86_64 1\n").unwrap();
config_file.write(b" %define PIC 1\n").unwrap();
config_file.write(b" %define STACK_ALIGNMENT 32\n").unwrap();
}
let mut config_include_arg = String::from("-I");
config_include_arg.push_str(&out_dir);
config_include_arg.push('/');
nasm_rs::compile_library_args("rav1easm", &["src/x86/mc.asm"], &[&config_include_arg, "-Isrc/"]);
}
if cfg!(windows) && cfg!(feature = "decode_test") {
panic!("Unsupported feature on this platform!");
}
#[allow(dead_code)]
fn rerun_dir<P: AsRef<Path>>(dir: P) {
for entry in fs::read_dir(dir).unwrap() {
let entry = entry.unwrap();
let path = entry.path();
println!("cargo:rerun-if-changed={}", path.to_string_lossy());
let cargo_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
let build_path = Path::new(&cargo_dir).join("aom_build/aom");
let debug = if let Some(v) = env::var("PROFILE").ok() {
match v.as_str() {
"bench" | "release" => false,
_ => true,
if path.is_dir() {
rerun_dir(path);
}
} else {
false
};
let dst = cmake::Config::new(build_path)
.define("CONFIG_DEBUG", (debug as u8).to_string())
.define("CONFIG_ANALYZER", "0")
.define("ENABLE_DOCS", "0")
.define("ENABLE_NASM", "1")
.define("ENABLE_TESTS", "0")
.no_build_target(cfg!(windows))
.build();
// Dirty hack to force a rebuild whenever the defaults are changed upstream
let _ = fs::remove_file(dst.join("build/CMakeCache.txt"));
#[cfg(windows)] {
println!("cargo:rustc-link-search=native={}", dst.join("build").to_str().unwrap());
println!("cargo:rustc-link-search=native={}", dst.join("build/Debug").to_str().unwrap());
println!("cargo:rustc-link-search=native={}", dst.join("build/Release").to_str().unwrap());
println!("cargo:rustc-link-lib=static=aom");
}
}
#[cfg(unix)] {
env::set_var("PKG_CONFIG_PATH", dst.join("lib/pkgconfig"));
let _libs = pkg_config::Config::new().statik(true).probe("aom").unwrap();
#[cfg(feature = "decode_test")] {
use std::io::Write;
let out_dir = env::var("OUT_DIR").unwrap();
let headers = _libs.include_paths.clone();
let mut builder = bindgen::builder()
.blacklist_type("max_align_t")
.rustfmt_bindings(false)
.header("data/aom.h");
for header in headers {
builder = builder.clang_arg("-I").clang_arg(header.to_str().unwrap());
}
// Manually fix the comment so rustdoc won't try to pick them
let s = builder
.generate()
.unwrap()
.to_string()
.replace("/**", "/*")
.replace("/*!", "/*");
let dest_path = Path::new(&out_dir).join("aom.rs");
let mut file = fs::File::create(dest_path).unwrap();
#[cfg(feature = "nasm")]
fn build_nasm_files() {
use std::fs::File;
use std::io::Write;
let out_dir = env::var("OUT_DIR").unwrap();
{
let dest_path = Path::new(&out_dir).join("config.asm");
let mut config_file = File::create(dest_path).unwrap();
config_file.write(b" %define private_prefix rav1e\n").unwrap();
config_file.write(b" %define ARCH_X86_32 0\n").unwrap();
config_file.write(b" %define ARCH_X86_64 1\n").unwrap();
config_file.write(b" %define PIC 1\n").unwrap();
config_file.write(b" %define STACK_ALIGNMENT 16\n").unwrap();
if cfg!(target_os="macos") {
config_file.write(b" %define PREFIX 1\n").unwrap();
}
}
let mut config_include_arg = String::from("-I");
config_include_arg.push_str(&out_dir);
config_include_arg.push('/');
nasm_rs::compile_library_args(
"rav1easm",
&[
"src/x86/data.asm",
"src/x86/ipred.asm",
"src/x86/itx.asm",
"src/x86/mc.asm",
"src/x86/me.asm",
"src/x86/sad_sse2.asm",
"src/x86/sad_avx.asm"
],
&[&config_include_arg, "-Isrc/"]
);
println!("cargo:rustc-link-lib=static=rav1easm");
rerun_dir("src/x86");
rerun_dir("src/ext/x86");
}
let _ = file.write(s.as_bytes());
}
#[allow(unused_variables)]
fn main() {
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
let os = env::var("CARGO_CFG_TARGET_OS").unwrap();
// let env = env::var("CARGO_CFG_TARGET_ENV").unwrap();
#[cfg(feature = "nasm")] {
if arch == "x86_64" {
build_nasm_files()
}
}
fn rerun_dir<P: AsRef<Path>>(dir: P) {
for entry in fs::read_dir(dir).unwrap() {
let entry = entry.unwrap();
let path = entry.path();
println!("cargo:rerun-if-changed={}", path.to_string_lossy());
if path.is_dir() {
rerun_dir(path);
}
}
if os == "windows" && cfg!(feature = "decode_test") {
panic!("Unsupported feature on this platform!");
}
rerun_dir("aom_build");
vergen::generate_cargo_keys(vergen::ConstantsFlags::all()).expect("Unable to generate the cargo keys!");
}
......@@ -5,11 +5,21 @@ set -e
#SEQ=!!!!! ENTER YOUR FAVORITE Y4M HERE !!!!!
IS_RELEASE=1
for arg in "$@"; do
shift
case "$arg" in
"--debug") IS_RELEASE=0 ;;
*) set -- "$@" "$arg"
esac
done
if [[ -z "${SEQ}" ]]; then
SEQ=nyan.y4m
SEQ10=nyan10.y4m
SEQ12=nyan12.y4m
wget -nc https://mf4.xiph.org/~ltrudeau/videos/nyan.y4m
#wget -nc https://people.xiph.org/~tdaede/nyan10.y4m
#wget -nc https://people.xiph.org/~tdaede/nyan12.y4m
......@@ -22,48 +32,6 @@ if [ ! -f $SEQ ]; then
exit 1 # terminate and indicate error
fi
# Hide githash to detect version changes
GITHASH=".git/rav1e.githash"
# Get previous version
EXPECTED_VERSION="42"
if [ -f $GITHASH ]; then
EXPECTED_VERSION=$(cat $GITHASH)
fi
# Get current version
ACTUAL_VERSION=$(git submodule status | xargs)
AOM_TEST="aom_test"
if [[ "$ACTUAL_VERSION" != "$EXPECTED_VERSION" ]] || [[ ! -f ./${AOM_TEST}/aomdec ]]; then
# Store current version to file
echo $ACTUAL_VERSION > $GITHASH
# Update aombuild
git submodule update --init
# Clean project files
cargo clean
# Get configure command from readme
CONFIGURE_CMD=$(fgrep "cmake ../aom" README.md)
# Wipe and create aom_test folder
rm -fR $AOM_TEST
mkdir -p $AOM_TEST
pushd $AOM_TEST
echo CONFIGURE COMMAND
echo $CONFIGURE_CMD
eval $CONFIGURE_CMD
# auto detect the number of cores and parallel build
make -j$(nproc --all)
popd
fi
# File containing the encoded sequence
ENC_FILE="enc_file.ivf"
# File containing the reconstructed sequence
......@@ -75,13 +43,20 @@ DEC_FILE="dec_file.y4m"
export RUST_BACKTRACE=1
# Build and run encoder
cargo run --bin rav1e --release -- $SEQ -o $ENC_FILE -s 3 -r $REC_FILE
BUILD_TYPE=""
if [ $IS_RELEASE == 1 ]; then
BUILD_TYPE="--release"
fi
cargo run --bin rav1e $BUILD_TYPE -- $SEQ -o $ENC_FILE -s 3 -r $REC_FILE
# Decode
${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
aomdec $ENC_FILE -o $DEC_FILE
# Input/Output compare
cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
tail -n+2 $DEC_FILE > /tmp/dec_file
tail -n+2 $REC_FILE > /tmp/rec_file
cmp /tmp/dec_file /tmp/rec_file || (printf '\e[1;31m%-6s\e[m\n\n' 'Desync detected!!!' && exit 1)
# Daala tools support coming soon
#DAALA_TOOLS="../daala/tools/"
......@@ -100,11 +75,11 @@ mpv --loop $DEC_FILE
# Repeat for high bit depth clips
#cargo run --bin rav1e --release -- $SEQ10 -o $ENC_FILE -s 3 -r $REC_FILE
#${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
#aomdec $ENC_FILE -o $DEC_FILE
#cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
#mpv --loop $DEC_FILE
#cargo run --bin rav1e --release -- $SEQ12 -o $ENC_FILE -s 3 -r $REC_FILE
#${AOM_TEST}/aomdec $ENC_FILE -o $DEC_FILE
#aomdec $ENC_FILE -o $DEC_FILE
#cmp <(tail -n+2 $DEC_FILE) <(tail -n+2 $REC_FILE)
#mpv --loop $DEC_FILE
header = "// SPDX-License-Identifier: MIT"
sys_includes = ["stddef.h", "stdint.h", "stdlib.h"]
no_includes = true
include_guard = "RAV1E_H"
tab_width = 4
style = "Type"
language = "C"
[parse]
parse_deps = true
include = ['rav1e']
[export]
prefix = "Ra"
item_types = ["enums", "structs", "unions", "typedefs", "opaque", "functions"]
[enum]
rename_variants = "ScreamingSnakeCase"
prefix_with_name = true
single-char-binding-names-threshold = 10
too-many-arguments-threshold = 16
cyclomatic-complexity-threshold = 40
cognitive-complexity-threshold = 40
trivial-copy-size-limit = 16 # 128-bits = 2 64-bit registers
\ No newline at end of file
[package]
name = "avformat-sys"
version = "0.1.0"
authors = ["Luca Barbato <lu_zero@gentoo.org>"]
license = "MIT"
description = "FFI bindings to ffmpeg"
edition = "2018"
build = "build.rs"
[package.metadata.pkg-config]
libavformat = "58.18.102"
libavcodec = "58.0.0"
libavutil = "56.0.0"
[features]
build_sources = []
[build-dependencies]
bindgen = "0.49"
metadeps = "1.1.2"
[dependencies]