Commit 5e9d1b07 authored by David Michael Barr's avatar David Michael Barr

Integrate dav1d ipred.asm

As a simple example, call ipred_dc_128.

Support linking asm on macOS; the linker trips over a dependency on
src/tables.c in mc.asm if we include it.

Avoid linking asm with MSVC for now as static library not found.
parent db224723
......@@ -25,8 +25,19 @@ pub fn generate_block(rng: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>, Vec<u16>) {
(block, above_context, left_context)
}
pub fn generate_block_u8(rng: &mut ChaChaRng) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
let block = vec![0u8; BLOCK_SIZE.width() * BLOCK_SIZE.height()];
let above_context: Vec<u8> =
(0..BLOCK_SIZE.height()).map(|_| rng.gen()).collect();
let left_context: Vec<u8> =
(0..BLOCK_SIZE.width()).map(|_| rng.gen()).collect();
(block, above_context, left_context)
}
pub fn pred_bench(c: &mut Criterion) {
c.bench_function("intra_dc_4x4", |b| intra_dc_4x4(b));
c.bench_function("intra_dc_128_4x4_u8", |b| intra_dc_128_4x4_u8(b));
c.bench_function("intra_dc_left_4x4", |b| intra_dc_left_4x4(b));
c.bench_function("intra_dc_top_4x4", |b| intra_dc_top_4x4(b));
c.bench_function("intra_h_4x4", |b| intra_h_4x4(b));
......@@ -54,6 +65,21 @@ pub fn intra_dc_4x4(b: &mut Bencher) {
})
}
pub fn intra_dc_128_4x4_u8(b: &mut Bencher) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block_u8(&mut ra);
b.iter(|| {
for _ in 0..MAX_ITER {
Block4x4::pred_dc_128(
&mut block,
BLOCK_SIZE.width(),
8
);
}
})
}
pub fn intra_dc_left_4x4(b: &mut Bencher) {
let mut ra = ChaChaRng::from_seed([0; 32]);
let (mut block, above, left) = generate_block(&mut ra);
......
......@@ -14,22 +14,27 @@ use std::fs;
use std::path::Path;
fn main() {
#[cfg(target_arch = "x86_64")] {
#[cfg(all(target_arch = "x86_64", not(windows)))] {
use std::fs::File;
use std::io::Write;
let out_dir = env::var("OUT_DIR").unwrap();
{
let dest_path = Path::new(&out_dir).join("config.asm");
let mut config_file = File::create(dest_path).unwrap();
config_file.write(b" %define private_prefix rav1e\n").unwrap();
config_file.write(b" %define ARCH_X86_32 0\n").unwrap();
config_file.write(b" %define ARCH_X86_64 1\n").unwrap();
config_file.write(b" %define PIC 1\n").unwrap();
config_file.write(b" %define STACK_ALIGNMENT 32\n").unwrap();
if cfg!(target_os="macos") {
config_file.write(b" %define PREFIX 1\n").unwrap();
}
}
let mut config_include_arg = String::from("-I");
config_include_arg.push_str(&out_dir);
config_include_arg.push('/');
nasm_rs::compile_library_args("rav1easm", &["src/x86/mc.asm"], &[&config_include_arg, "-Isrc/"]);
nasm_rs::compile_library_args("rav1easm", &["src/x86/ipred.asm"], &[&config_include_arg, "-Isrc/"]);
println!("cargo:rustc-link-lib=static=rav1easm");
}
if cfg!(windows) && cfg!(feature = "decode_test") {
......
......@@ -11,7 +11,6 @@
#![cfg_attr(feature = "cargo-clippy", allow(cast_lossless))]
#![cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
#[cfg(test)]
use libc;
use num_traits::*;
......@@ -217,6 +216,14 @@ fn get_scaled_luma_q0(alpha_q3: i16, ac_pred_q3: i16) -> i32 {
}
}
#[cfg(all(target_arch = "x86_64", not(windows)))]
extern {
fn rav1e_ipred_dc_128_avx2(
dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
width: libc::c_int, height: libc::c_int, angle: libc::c_int
);
}
// TODO: rename the type bounds later
pub trait Intra<T>: Dim
where
......@@ -241,6 +248,22 @@ where
#[cfg_attr(feature = "comparative_bench", inline(never))]
fn pred_dc_128(output: &mut [T], stride: usize, bit_depth: usize) {
#[cfg(all(target_arch = "x86_64", not(windows)))]
{
use std::ptr;
if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
return unsafe {
rav1e_ipred_dc_128_avx2(
output.as_mut_ptr() as *mut _,
stride as libc::ptrdiff_t,
ptr::null(),
Self::W as libc::c_int,
Self::H as libc::c_int,
0
)
};
}
}
for y in 0..Self::H {
for x in 0..Self::W {
output[y * stride + x] = (128u32 << (bit_depth - 8)).as_();
......@@ -874,6 +897,18 @@ pub mod test {
}
}
#[test]
fn pred_matches_u8() {
let row128 = [128u8; 32];
let mut o = vec![0u8; 32 * 32];
Block4x4::pred_dc_128(&mut o, 32, 8);
for l in o.chunks(32).take(4) {
assert_eq!(l[..4], row128[..4]);
}
}
#[test]
fn pred_same() {
let mut ra = ChaChaRng::from_seed([0; 32]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment