predict.rs 34.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Raphael Zumer's avatar
Raphael Zumer committed
10
#![allow(non_upper_case_globals)]
Michael Bebenita's avatar
Michael Bebenita committed
11 12
#![cfg_attr(feature = "cargo-clippy", allow(cast_lossless))]
#![cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
Raphael Zumer's avatar
Raphael Zumer committed
13

Raphaël Zumer's avatar
Raphaël Zumer committed
14
use context::{INTRA_MODES, MAX_TX_SIZE};
Michael Bebenita's avatar
Michael Bebenita committed
15
use partition::*;
16
use util::*;
17

Raphaël Zumer's avatar
Raphaël Zumer committed
18 19 20
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
use libc;
use num_traits::*;
21 22 23 24
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
25 26 27
use std::mem::*;
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
use std::ptr;
28

29
pub static RAV1E_INTRA_MODES: &'static [PredictionMode] = &[
Michael Bebenita's avatar
Michael Bebenita committed
30 31 32 33 34 35 36
  PredictionMode::DC_PRED,
  PredictionMode::H_PRED,
  PredictionMode::V_PRED,
  PredictionMode::SMOOTH_PRED,
  PredictionMode::SMOOTH_H_PRED,
  PredictionMode::SMOOTH_V_PRED,
  PredictionMode::PAETH_PRED
37 38
];

39
// Intra prediction modes tested at high speed levels
Michael Bebenita's avatar
Michael Bebenita committed
40
#[cfg_attr(rustfmt, rustfmt_skip)]
41 42 43 44 45 46
pub static RAV1E_INTRA_MODES_MINIMAL: &'static [PredictionMode] = &[
    PredictionMode::DC_PRED,
    PredictionMode::H_PRED,
    PredictionMode::V_PRED
];

47
pub static RAV1E_INTER_MODES_MINIMAL: &'static [PredictionMode] = &[
48
  PredictionMode::NEARESTMV
49 50
];

fbossen's avatar
fbossen committed
51 52 53 54 55 56 57 58
pub static RAV1E_INTER_COMPOUND_MODES: &'static [PredictionMode] = &[
  PredictionMode::GLOBAL_GLOBALMV,
  PredictionMode::NEAREST_NEARESTMV,
  PredictionMode::NEW_NEWMV,
  PredictionMode::NEAREST_NEWMV,
  PredictionMode::NEW_NEARESTMV
];

Raphael Zumer's avatar
Raphael Zumer committed
59 60 61 62
// Weights are quadratic from '1' to '1 / block_size', scaled by 2^sm_weight_log2_scale.
const sm_weight_log2_scale: u8 = 8;

// Smooth predictor weights
Michael Bebenita's avatar
Michael Bebenita committed
63
#[cfg_attr(rustfmt, rustfmt_skip)]
Raphael Zumer's avatar
Raphael Zumer committed
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
static sm_weight_arrays: [u8; 2 * MAX_TX_SIZE] = [
    // Unused, because we always offset by bs, which is at least 2.
    0, 0,
    // bs = 2
    255, 128,
    // bs = 4
    255, 149, 85, 64,
    // bs = 8
    255, 197, 146, 105, 73, 50, 37, 32,
    // bs = 16
    255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
    // bs = 32
    255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
    66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
    // bs = 64
79
    255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
Raphael Zumer's avatar
Raphael Zumer committed
80 81
    150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
    65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
82
    13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
Raphael Zumer's avatar
Raphael Zumer committed
83 84
];

Yushin Cho's avatar
Yushin Cho committed
85 86 87 88 89 90 91 92 93 94 95
const NEED_LEFT: u8 = 1 << 1;
const NEED_ABOVE: u8 = 1 << 2;
const NEED_ABOVERIGHT: u8 = 1 << 3;
const NEED_ABOVELEFT: u8 = 1 << 4;
const NEED_BOTTOMLEFT: u8 = 1 << 5;

/*const INTRA_EDGE_FILT: usize = 3;
const INTRA_EDGE_TAPS: usize = 5;
const MAX_UPSAMPLE_SZ: usize = 16;*/

pub static extend_modes: [u8; INTRA_MODES] = [
96 97 98 99 100 101 102 103 104 105 106 107 108
  NEED_ABOVE | NEED_LEFT,                  // DC
  NEED_ABOVE,                              // V
  NEED_LEFT,                               // H
  NEED_ABOVE | NEED_ABOVERIGHT,            // D45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
  NEED_LEFT | NEED_BOTTOMLEFT,             // D203
  NEED_ABOVE | NEED_ABOVERIGHT,            // D67
  NEED_LEFT | NEED_ABOVE,                  // SMOOTH
  NEED_LEFT | NEED_ABOVE,                  // SMOOTH_V
  NEED_LEFT | NEED_ABOVE,                  // SMOOTH_H
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT  // PAETH
Yushin Cho's avatar
Yushin Cho committed
109 110
];

111
pub trait Dim {
Michael Bebenita's avatar
Michael Bebenita committed
112 113
  const W: usize;
  const H: usize;
114 115
}

116 117 118 119 120 121 122 123 124
macro_rules! block_dimension {
  ($W:expr, $H:expr) => {
    paste::item! {
      pub struct [<Block $W x $H>];

      impl Dim for [<Block $W x $H>] {
        const W: usize = $W;
        const H: usize = $H;
      }
125 126 127

      impl Intra<u8> for [<Block $W x $H>] {}
      impl Intra<u16> for [<Block $W x $H>] {}
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
    }
  };
}

macro_rules! blocks_dimension {
  ($(($W:expr, $H:expr)),+) => {
    $(
      block_dimension! { $W, $H }
    )*
  }
}

blocks_dimension! { (4, 4), (8, 8), (16, 16), (32, 32), (64, 64) }
blocks_dimension! { (4, 8), (8, 16), (16, 32), (32, 64) }
blocks_dimension! { (8, 4), (16, 8), (32, 16), (64, 32) }
blocks_dimension! { (4, 16), (8, 32), (16, 64) }
blocks_dimension! { (16, 4), (32, 8), (64, 16) }
145

146 147 148 149 150 151 152 153 154 155 156
#[inline(always)]
fn get_scaled_luma_q0(alpha_q3: i16, ac_pred_q3: i16) -> i32 {
  let scaled_luma_q6 = (alpha_q3 as i32) * (ac_pred_q3 as i32);
  let abs_scaled_luma_q0 = (scaled_luma_q6.abs() + 32) >> 6;
  if scaled_luma_q6 < 0 {
    -abs_scaled_luma_q0
  } else {
    abs_scaled_luma_q0
  }
}

Luca Barbato's avatar
Luca Barbato committed
157
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
158 159 160 161 162 163 164 165 166
macro_rules! decl_angular_ipred_fn {
  ($f:ident) => {
    extern {
      fn $f(
        dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
        width: libc::c_int, height: libc::c_int, angle: libc::c_int
      );
    }
  };
David Michael Barr's avatar
David Michael Barr committed
167 168
}

Luca Barbato's avatar
Luca Barbato committed
169
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
170
decl_angular_ipred_fn!(rav1e_ipred_dc_avx2);
Luca Barbato's avatar
Luca Barbato committed
171
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
172
decl_angular_ipred_fn!(rav1e_ipred_dc_128_avx2);
Luca Barbato's avatar
Luca Barbato committed
173
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
174
decl_angular_ipred_fn!(rav1e_ipred_dc_left_avx2);
Luca Barbato's avatar
Luca Barbato committed
175
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
176
decl_angular_ipred_fn!(rav1e_ipred_dc_top_avx2);
Luca Barbato's avatar
Luca Barbato committed
177
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
178
decl_angular_ipred_fn!(rav1e_ipred_h_avx2);
Luca Barbato's avatar
Luca Barbato committed
179
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
180
decl_angular_ipred_fn!(rav1e_ipred_v_avx2);
Luca Barbato's avatar
Luca Barbato committed
181
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
182
decl_angular_ipred_fn!(rav1e_ipred_paeth_avx2);
Luca Barbato's avatar
Luca Barbato committed
183
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
184
decl_angular_ipred_fn!(rav1e_ipred_smooth_avx2);
Luca Barbato's avatar
Luca Barbato committed
185
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
186
decl_angular_ipred_fn!(rav1e_ipred_smooth_h_avx2);
Luca Barbato's avatar
Luca Barbato committed
187
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
188
decl_angular_ipred_fn!(rav1e_ipred_smooth_v_avx2);
189

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
macro_rules! decl_cfl_pred_fn {
  ($f:ident) => {
    extern {
      fn $f(
        dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
        width: libc::c_int, height: libc::c_int, ac: *const u8,
        alpha: libc::c_int
      );
    }
  };
}

#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_avx2);
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_128_avx2);
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_left_avx2);
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_top_avx2);

212 213
pub trait Intra<T>: Dim
where
214
  T: Pixel,
215 216 217 218
  i32: AsPrimitive<T>,
  u32: AsPrimitive<T>,
  usize: AsPrimitive<T>
{
rzumer's avatar
rzumer committed
219
  #[cfg_attr(feature = "comparative_bench", inline(never))]
220
  fn pred_dc(output: &mut [T], stride: usize, above: &[T], left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
221
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
222 223 224 225 226 227 228 229 230 231 232 233 234 235
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
236 237 238
    let edges = left[..Self::H].iter().chain(above[..Self::W].iter());
    let len = (Self::W + Self::H) as u32;
    let avg =
239
      ((edges.fold(0u32, |acc, &v| { let v: u32 = v.into(); v + acc }) + (len >> 1)) / len).as_();
Michael Bebenita's avatar
Michael Bebenita committed
240 241 242 243 244

    for line in output.chunks_mut(stride).take(Self::H) {
      for v in &mut line[..Self::W] {
        *v = avg;
      }
Luca Barbato's avatar
Luca Barbato committed
245
    }
Michael Bebenita's avatar
Michael Bebenita committed
246
  }
Luca Barbato's avatar
Luca Barbato committed
247

rzumer's avatar
rzumer committed
248
  #[cfg_attr(feature = "comparative_bench", inline(never))]
249
  fn pred_dc_128(output: &mut [T], stride: usize, bit_depth: usize) {
Luca Barbato's avatar
Luca Barbato committed
250
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
David Michael Barr's avatar
David Michael Barr committed
251 252 253 254 255 256 257 258 259 260 261 262 263 264
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_128_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            ptr::null(),
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
265 266
    for y in 0..Self::H {
      for x in 0..Self::W {
267
        output[y * stride + x] = (128u32 << (bit_depth - 8)).as_();
Michael Bebenita's avatar
Michael Bebenita committed
268
      }
269
    }
Michael Bebenita's avatar
Michael Bebenita committed
270 271
  }

rzumer's avatar
rzumer committed
272
  #[cfg_attr(feature = "comparative_bench", inline(never))]
273
  fn pred_dc_left(output: &mut [T], stride: usize, _above: &[T], left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
274
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
275 276 277 278 279 280
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_left_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
281
            left.as_ptr().add(Self::H) as *const _,
282 283 284 285 286 287 288
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
289 290
    let sum = left[..Self::H].iter().fold(0u32, |acc, &v| { let v: u32 = v.into(); v + acc });
    let avg = ((sum + (Self::H >> 1) as u32) / Self::H as u32).as_();
291 292
    for line in output.chunks_mut(stride).take(Self::H) {
      line[..Self::W].iter_mut().for_each(|v| *v = avg);
293
    }
Michael Bebenita's avatar
Michael Bebenita committed
294 295
  }

rzumer's avatar
rzumer committed
296
  #[cfg_attr(feature = "comparative_bench", inline(never))]
297
  fn pred_dc_top(output: &mut [T], stride: usize, above: &[T], _left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
298
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
299 300 301 302 303 304 305 306 307 308 309 310 311 312
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_top_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
313 314
    let sum = above[..Self::W].iter().fold(0u32, |acc, &v| { let v: u32 = v.into(); v + acc });
    let avg = ((sum + (Self::W >> 1) as u32) / Self::W as u32).as_();
315 316
    for line in output.chunks_mut(stride).take(Self::H) {
      line[..Self::W].iter_mut().for_each(|v| *v = avg);
317
    }
Michael Bebenita's avatar
Michael Bebenita committed
318
  }
319

rzumer's avatar
rzumer committed
320
  #[cfg_attr(feature = "comparative_bench", inline(never))]
321
  fn pred_h(output: &mut [T], stride: usize, left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
322
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
323 324 325 326 327 328
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_h_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
329
            left.as_ptr().add(Self::H) as *const _,
330 331 332 333 334 335 336
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
337 338 339
    for (line, l) in
      output.chunks_mut(stride).zip(left[..Self::H].iter().rev())
    {
Michael Bebenita's avatar
Michael Bebenita committed
340 341 342
      for v in &mut line[..Self::W] {
        *v = *l;
      }
343
    }
Michael Bebenita's avatar
Michael Bebenita committed
344
  }
345

rzumer's avatar
rzumer committed
346
  #[cfg_attr(feature = "comparative_bench", inline(never))]
347
  fn pred_v(output: &mut [T], stride: usize, above: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
348
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
349 350 351 352 353 354 355 356 357 358 359 360 361 362
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_v_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
363 364
    for line in output.chunks_mut(stride).take(Self::H) {
      line[..Self::W].clone_from_slice(&above[..Self::W])
Luc Trudeau's avatar
Luc Trudeau committed
365
    }
Michael Bebenita's avatar
Michael Bebenita committed
366 367
  }

rzumer's avatar
rzumer committed
368
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
369
  fn pred_paeth(
370 371
    output: &mut [T], stride: usize, above: &[T], left: &[T],
    above_left: T
Michael Bebenita's avatar
Michael Bebenita committed
372
  ) {
Luca Barbato's avatar
Luca Barbato committed
373
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
374 375 376 377 378 379 380 381 382 383 384 385 386 387
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_paeth_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
388 389 390
    for r in 0..Self::H {
      for c in 0..Self::W {
        // Top-left pixel is fixed in libaom
391
        let raw_top_left: i32 = above_left.into();
392
        let raw_left: i32 = left[Self::H - 1 - r].into();
393
        let raw_top: i32 = above[c].into();
Michael Bebenita's avatar
Michael Bebenita committed
394 395 396 397 398 399 400 401 402 403

        let p_base = raw_top + raw_left - raw_top_left;
        let p_left = (p_base - raw_left).abs();
        let p_top = (p_base - raw_top).abs();
        let p_top_left = (p_base - raw_top_left).abs();

        let output_index = r * stride + c;

        // Return nearest to base of left, top and top_left
        if p_left <= p_top && p_left <= p_top_left {
404
          output[output_index] = raw_left.as_();
Michael Bebenita's avatar
Michael Bebenita committed
405
        } else if p_top <= p_top_left {
406
          output[output_index] = raw_top.as_();
Michael Bebenita's avatar
Michael Bebenita committed
407
        } else {
408
          output[output_index] = raw_top_left.as_();
409
        }
Michael Bebenita's avatar
Michael Bebenita committed
410
      }
411
    }
Michael Bebenita's avatar
Michael Bebenita committed
412 413
  }

rzumer's avatar
rzumer committed
414
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
415
  fn pred_smooth(
416
    output: &mut [T], stride: usize, above: &[T], left: &[T]
Michael Bebenita's avatar
Michael Bebenita committed
417
  ) {
Luca Barbato's avatar
Luca Barbato committed
418
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
419 420 421 422 423 424 425 426 427 428 429 430 431 432
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_smooth_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
433
    let below_pred = left[0]; // estimated by bottom-left pixel
Michael Bebenita's avatar
Michael Bebenita committed
434 435 436 437 438
    let right_pred = above[Self::W - 1]; // estimated by top-right pixel
    let sm_weights_w = &sm_weight_arrays[Self::W..];
    let sm_weights_h = &sm_weight_arrays[Self::H..];

    let log2_scale = 1 + sm_weight_log2_scale;
439
    let scale = 1_u16 << sm_weight_log2_scale;
Michael Bebenita's avatar
Michael Bebenita committed
440 441 442 443 444 445 446 447 448 449

    // Weights sanity checks
    assert!((sm_weights_w[0] as u16) < scale);
    assert!((sm_weights_h[0] as u16) < scale);
    assert!((scale - sm_weights_w[Self::W - 1] as u16) < scale);
    assert!((scale - sm_weights_h[Self::H - 1] as u16) < scale);
    assert!(log2_scale as usize + size_of_val(&output[0]) < 31); // ensures no overflow when calculating predictor

    for r in 0..Self::H {
      for c in 0..Self::W {
450
        let pixels = [above[c], below_pred, left[Self::H - 1 - r], right_pred];
Michael Bebenita's avatar
Michael Bebenita committed
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467

        let weights = [
          sm_weights_h[r] as u16,
          scale - sm_weights_h[r] as u16,
          sm_weights_w[c] as u16,
          scale - sm_weights_w[c] as u16
        ];

        assert!(
          scale >= (sm_weights_h[r] as u16)
            && scale >= (sm_weights_w[c] as u16)
        );

        // Sum up weighted pixels
        let mut this_pred: u32 = weights
          .iter()
          .zip(pixels.iter())
468
          .map(|(w, p)| { let p: u32 = (*p).into(); (*w as u32) * p })
Michael Bebenita's avatar
Michael Bebenita committed
469 470 471 472 473
          .sum();
        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;

        let output_index = r * stride + c;

474
        output[output_index] = this_pred.as_();
Michael Bebenita's avatar
Michael Bebenita committed
475
      }
Raphael Zumer's avatar
Raphael Zumer committed
476
    }
Michael Bebenita's avatar
Michael Bebenita committed
477
  }
478

rzumer's avatar
rzumer committed
479
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
480
  fn pred_smooth_h(
481
    output: &mut [T], stride: usize, above: &[T], left: &[T]
Michael Bebenita's avatar
Michael Bebenita committed
482
  ) {
Luca Barbato's avatar
Luca Barbato committed
483
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
484 485 486 487 488 489 490 491 492 493 494 495 496 497
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_smooth_h_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
498 499
    let right_pred = above[Self::W - 1]; // estimated by top-right pixel
    let sm_weights = &sm_weight_arrays[Self::W..];
500

Michael Bebenita's avatar
Michael Bebenita committed
501 502
    let log2_scale = sm_weight_log2_scale;
    let scale = 1_u16 << sm_weight_log2_scale;
503

Michael Bebenita's avatar
Michael Bebenita committed
504 505 506 507
    // Weights sanity checks
    assert!((sm_weights[0] as u16) < scale);
    assert!((scale - sm_weights[Self::W - 1] as u16) < scale);
    assert!(log2_scale as usize + size_of_val(&output[0]) < 31); // ensures no overflow when calculating predictor
508

Michael Bebenita's avatar
Michael Bebenita committed
509 510
    for r in 0..Self::H {
      for c in 0..Self::W {
511
        let pixels = [left[Self::H - 1 - r], right_pred];
Michael Bebenita's avatar
Michael Bebenita committed
512
        let weights = [sm_weights[c] as u16, scale - sm_weights[c] as u16];
513

Michael Bebenita's avatar
Michael Bebenita committed
514
        assert!(scale >= sm_weights[c] as u16);
515

Michael Bebenita's avatar
Michael Bebenita committed
516 517 518
        let mut this_pred: u32 = weights
          .iter()
          .zip(pixels.iter())
519
          .map(|(w, p)| { let p: u32 = (*p).into(); (*w as u32) * p })
Michael Bebenita's avatar
Michael Bebenita committed
520 521
          .sum();
        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
522

Michael Bebenita's avatar
Michael Bebenita committed
523
        let output_index = r * stride + c;
524

525
        output[output_index] = this_pred.as_();
Michael Bebenita's avatar
Michael Bebenita committed
526
      }
527
    }
Michael Bebenita's avatar
Michael Bebenita committed
528
  }
529

rzumer's avatar
rzumer committed
530
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
531
  fn pred_smooth_v(
532
    output: &mut [T], stride: usize, above: &[T], left: &[T]
Michael Bebenita's avatar
Michael Bebenita committed
533
  ) {
Luca Barbato's avatar
Luca Barbato committed
534
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
535 536 537 538 539 540 541 542 543 544 545 546 547 548
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_smooth_v_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
549
    let below_pred = left[0]; // estimated by bottom-left pixel
Michael Bebenita's avatar
Michael Bebenita committed
550
    let sm_weights = &sm_weight_arrays[Self::H..];
551

Michael Bebenita's avatar
Michael Bebenita committed
552 553
    let log2_scale = sm_weight_log2_scale;
    let scale = 1_u16 << sm_weight_log2_scale;
554

Michael Bebenita's avatar
Michael Bebenita committed
555 556 557 558
    // Weights sanity checks
    assert!((sm_weights[0] as u16) < scale);
    assert!((scale - sm_weights[Self::H - 1] as u16) < scale);
    assert!(log2_scale as usize + size_of_val(&output[0]) < 31); // ensures no overflow when calculating predictor
559

Michael Bebenita's avatar
Michael Bebenita committed
560 561 562 563
    for r in 0..Self::H {
      for c in 0..Self::W {
        let pixels = [above[c], below_pred];
        let weights = [sm_weights[r] as u16, scale - sm_weights[r] as u16];
564

Michael Bebenita's avatar
Michael Bebenita committed
565
        assert!(scale >= sm_weights[r] as u16);
566

Michael Bebenita's avatar
Michael Bebenita committed
567 568 569
        let mut this_pred: u32 = weights
          .iter()
          .zip(pixels.iter())
570
          .map(|(w, p)| { let p: u32 = (*p).into(); (*w as u32) * p })
Michael Bebenita's avatar
Michael Bebenita committed
571 572
          .sum();
        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
573

Michael Bebenita's avatar
Michael Bebenita committed
574
        let output_index = r * stride + c;
575

576
        output[output_index] = this_pred.as_();
Michael Bebenita's avatar
Michael Bebenita committed
577
      }
578
    }
Michael Bebenita's avatar
Michael Bebenita committed
579
  }
580

581 582 583
  #[target_feature(enable = "ssse3")]
  #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
  unsafe fn pred_cfl_ssse3(
584
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16,
585 586 587 588
    bit_depth: usize
  ) {
    let alpha_sign = _mm_set1_epi16(alpha);
    let alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9);
589 590
    let dc_scalar: u32 = (*output.as_ptr()).into();
    let dc_q0 = _mm_set1_epi16(dc_scalar as i16);
591 592 593
    let max = _mm_set1_epi16((1 << bit_depth) - 1);

    for j in 0..Self::H {
594
      let luma = ac.as_ptr().add(Self::W * j);
595
      let line = output.as_mut_ptr().add(stride * j);
596 597

      let mut i = 0isize;
598
      let mut last = _mm_setzero_si128();
599 600 601
      while (i as usize) < Self::W {
        let ac_q3 = _mm_loadu_si128(luma.offset(i) as *const _);
        let ac_sign = _mm_sign_epi16(alpha_sign, ac_q3);
602 603
        let abs_scaled_luma_q0 =
          _mm_mulhrs_epi16(_mm_abs_epi16(ac_q3), alpha_q12);
604 605
        let scaled_luma_q0 = _mm_sign_epi16(abs_scaled_luma_q0, ac_sign);
        let pred = _mm_add_epi16(scaled_luma_q0, dc_q0);
606 607 608 609 610 611 612 613 614 615 616 617 618 619
        if size_of::<T>() == 1 {
          if Self::W < 16 {
            let res = _mm_packus_epi16(pred, pred);
            if Self::W == 4 {
               *(line.offset(i) as *mut i32) = _mm_cvtsi128_si32(res);
            } else {
              _mm_storel_epi64(line.offset(i) as *mut _, res);
            }
          } else if (i & 15) == 0 {
            last = pred;
          } else {
            let res = _mm_packus_epi16(last, pred);
            _mm_storeu_si128(line.offset(i - 8) as *mut _, res);
          }
620
        } else {
621 622 623 624 625 626
          let res = _mm_min_epi16(max, _mm_max_epi16(pred, _mm_setzero_si128()));
          if Self::W == 4 {
            _mm_storel_epi64(line.offset(i) as *mut _, res);
          } else {
            _mm_storeu_si128(line.offset(i) as *mut _, res);
          }
627 628 629 630 631 632
        }
        i += 8;
      }
    }
  }

633 634
  fn pred_cfl_inner(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize
635
  ) {
636 637 638
    if alpha == 0 {
      return;
    }
639 640 641 642 643
    assert!(32 >= Self::W);
    assert!(ac.len() >= 32 * (Self::H - 1) + Self::W);
    assert!(stride >= Self::W);
    assert!(output.len() >= stride * (Self::H - 1) + Self::W);
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
644 645 646 647 648 649
    {
      if is_x86_feature_detected!("ssse3") {
        return unsafe {
          Self::pred_cfl_ssse3(output, stride, ac, alpha, bit_depth)
        };
      }
650 651
    }

652
    let sample_max = (1 << bit_depth) - 1;
653
    let avg: i32 = output[0].into();
654 655

    for (line, luma) in
656
      output.chunks_mut(stride).zip(ac.chunks(Self::W)).take(Self::H)
657 658 659
    {
      for (v, &l) in line[..Self::W].iter_mut().zip(luma[..Self::W].iter()) {
        *v =
660
          (avg + get_scaled_luma_q0(alpha, l)).max(0).min(sample_max).as_();
661 662 663
      }
    }
  }
664 665 666 667 668 669

  #[cfg_attr(feature = "comparative_bench", inline(never))]
  fn pred_cfl(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize,
    above: &[T], left: &[T]
  ) {
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
686 687 688 689 690 691 692
    Self::pred_dc(output, stride, above, left);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }

  fn pred_cfl_128(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize
  ) {
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_128_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            ptr::null(),
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
709 710 711 712 713 714 715 716
    Self::pred_dc_128(output, stride, bit_depth);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }

  fn pred_cfl_left(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize,
    above: &[T], left: &[T]
  ) {
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_left_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
733 734 735 736 737 738 739 740
    Self::pred_dc_left(output, stride, above, left);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }

  fn pred_cfl_top(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize,
    above: &[T], left: &[T]
  ) {
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_top_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
757 758 759
    Self::pred_dc_top(output, stride, above, left);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }
Luc Trudeau's avatar
Luc Trudeau committed
760 761
}

762
pub trait Inter: Dim {}
763

764
#[cfg(all(test, feature = "aom"))]
765
pub mod test {
Michael Bebenita's avatar
Michael Bebenita committed
766
  use super::*;
Thomas Daede's avatar
Thomas Daede committed
767
  use rand::{ChaChaRng, Rng, SeedableRng};
Raphaël Zumer's avatar
Raphaël Zumer committed
768
  use util::*;
Michael Bebenita's avatar
Michael Bebenita committed
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784

  const MAX_ITER: usize = 50000;

  fn setup_pred(
    ra: &mut ChaChaRng
  ) -> (Vec<u16>, Vec<u16>, Vec<u16>, Vec<u16>) {
    let output = vec![0u16; 32 * 32];
    let above: Vec<u16> = (0..32).map(|_| ra.gen()).collect();
    let left: Vec<u16> = (0..32).map(|_| ra.gen()).collect();

    let o1 = output.clone();
    let o2 = output.clone();

    (above, left, o1, o2)
  }

785 786 787 788 789 790 791 792 793
  macro_rules! wrap_aom_pred_fn {
    ($fn_4x4:ident, $aom_fn:ident) => {
      extern {
        fn $aom_fn(
          dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int,
          bh: libc::c_int, above: *const u16, left: *const u16,
          bd: libc::c_int
        );
      }
Michael Bebenita's avatar
Michael Bebenita committed
794

795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
      fn $fn_4x4(
        output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
      ) {
        let mut left = left.to_vec();
        left.reverse();
        unsafe {
          $aom_fn(
            output.as_mut_ptr(),
            stride as libc::ptrdiff_t,
            4,
            4,
            above.as_ptr(),
            left.as_ptr(),
            8
          );
        }
      }
    };
Michael Bebenita's avatar
Michael Bebenita committed
813 814
  }

815 816 817 818 819 820 821 822 823
  wrap_aom_pred_fn!(pred_dc_4x4, highbd_dc_predictor);
  wrap_aom_pred_fn!(pred_dc_left_4x4, highbd_dc_left_predictor);
  wrap_aom_pred_fn!(pred_dc_top_4x4, highbd_dc_top_predictor);
  wrap_aom_pred_fn!(pred_h_4x4, highbd_h_predictor);
  wrap_aom_pred_fn!(pred_v_4x4, highbd_v_predictor);
  wrap_aom_pred_fn!(pred_paeth_4x4, highbd_paeth_predictor);
  wrap_aom_pred_fn!(pred_smooth_4x4, highbd_smooth_predictor);
  wrap_aom_pred_fn!(pred_smooth_h_4x4, highbd_smooth_h_predictor);
  wrap_aom_pred_fn!(pred_smooth_v_4x4, highbd_smooth_v_predictor);
Michael Bebenita's avatar
Michael Bebenita committed
824

825 826 827 828 829 830
  extern {
    fn cfl_predict_hbd_c(
      ac_buf_q3: *const i16, dst: *mut u16, stride: libc::ptrdiff_t,
      alpha_q3: libc::c_int, bd: libc::c_int, bw: libc::c_int,
      bh: libc::c_int
    );
Michael Bebenita's avatar
Michael Bebenita committed
831
  }
832

833 834 835
  pub fn pred_cfl_4x4(
    output: &mut [u16], stride: usize, ac: &[i16], alpha: i16, bd: i32
  ) {
836 837 838 839
    let mut ac32 = [0; 4*32];
    for (l32, l) in ac32.chunks_mut(32).zip(ac.chunks(4).take(4)) {
        l32[..4].copy_from_slice(&l);
    }
840 841
    unsafe {
      cfl_predict_hbd_c(
842
        ac32.as_ptr(),
843 844 845 846 847 848 849 850 851 852
        output.as_mut_ptr(),
        stride as libc::ptrdiff_t,
        alpha as libc::c_int,
        bd,
        4,
        4
      );
    }
  }

Michael Bebenita's avatar
Michael Bebenita committed
853 854
  fn do_dc_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
Luca Barbato's avatar
Luca Barbato committed
855

Michael Bebenita's avatar
Michael Bebenita committed
856 857
    pred_dc_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_dc(&mut o2, 32, &above[..4], &left[..4]);
Luca Barbato's avatar
Luca Barbato committed
858

Michael Bebenita's avatar
Michael Bebenita committed
859 860
    (o1, o2)
  }
Luca Barbato's avatar
Luca Barbato committed
861

862 863 864 865 866 867 868 869 870
  fn do_dc_left_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);

    pred_dc_left_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_dc_left(&mut o2, 32, &above[..4], &left[..4]);

    (o1, o2)
  }

871 872 873 874 875 876 877 878 879
  fn do_dc_top_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);

    pred_dc_top_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_dc_top(&mut o2, 32, &above[..4], &left[..4]);

    (o1, o2)
  }

Michael Bebenita's avatar
Michael Bebenita committed
880 881
  fn do_h_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
882

Michael Bebenita's avatar
Michael Bebenita committed
883 884
    pred_h_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_h(&mut o2, 32, &left[..4]);
885

Michael Bebenita's avatar
Michael Bebenita committed
886 887
    (o1, o2)
  }
888

Michael Bebenita's avatar
Michael Bebenita committed
889 890
  fn do_v_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
Luc Trudeau's avatar
Luc Trudeau committed
891

Michael Bebenita's avatar
Michael Bebenita committed
892 893
    pred_v_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_v(&mut o2, 32, &above[..4]);
Luc Trudeau's avatar
Luc Trudeau committed
894

Michael Bebenita's avatar
Michael Bebenita committed
895 896
    (o1, o2)
  }
Luc Trudeau's avatar
Luc Trudeau committed
897

Michael Bebenita's avatar
Michael Bebenita committed
898 899 900
  fn do_paeth_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
    let above_left = unsafe { *above.as_ptr().offset(-1) };
901

Michael Bebenita's avatar
Michael Bebenita committed
902 903
    pred_paeth_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_paeth(&mut o2, 32, &above[..4], &left[..4], above_left);
904

Michael Bebenita's avatar
Michael Bebenita committed
905 906
    (o1, o2)
  }
907

Michael Bebenita's avatar
Michael Bebenita committed
908 909
  fn do_smooth_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
Raphael Zumer's avatar
Raphael Zumer committed
910

Michael Bebenita's avatar
Michael Bebenita committed
911
    pred_smooth_4x4(&mut o1, 32, &above[..4], &left[..4]);
rzumer's avatar
rzumer committed
912
    Block4x4::pred_smooth(&mut o2, 32, &above[..4], &left[..4]);
Raphael Zumer's avatar
Raphael Zumer committed
913

Michael Bebenita's avatar
Michael Bebenita committed
914 915
    (o1, o2)
  }
Raphael Zumer's avatar
Raphael Zumer committed
916

Michael Bebenita's avatar
Michael Bebenita committed
917 918
  fn do_smooth_h_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
919

Michael Bebenita's avatar
Michael Bebenita committed
920
    pred_smooth_h_4x4(&mut o1, 32, &above[..4], &left[..4]);
rzumer's avatar
rzumer committed
921
    Block4x4::pred_smooth_h(&mut o2, 32, &above[..4], &left[..4]);
922

Michael Bebenita's avatar
Michael Bebenita committed
923 924
    (o1, o2)
  }
925

Michael Bebenita's avatar
Michael Bebenita committed
926 927
  fn do_smooth_v_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
928

Michael Bebenita's avatar
Michael Bebenita committed
929
    pred_smooth_v_4x4(&mut o1, 32, &above[..4], &left[..4]);
rzumer's avatar
rzumer committed
930
    Block4x4::pred_smooth_v(&mut o2, 32, &above[..4], &left[..4]);
931

Michael Bebenita's avatar
Michael Bebenita committed
932 933
    (o1, o2)
  }
934

935
  fn setup_cfl_pred(
936
    ra: &mut ChaChaRng, bit_depth: usize
937 938 939
  ) -> (Vec<u16>, Vec<u16>, Vec<i16>, i16, Vec<u16>, Vec<u16>) {
    let o1 = vec![0u16; 32 * 32];
    let o2 = vec![0u16; 32 * 32];
940
    let max: u16 = (1 << bit_depth) - 1;
941 942 943 944
    let above: Vec<u16> =
      (0..32).map(|_| ra.gen()).map(|v: u16| v & max).collect();
    let left: Vec<u16> =
      (0..32).map(|_| ra.gen()).map(|v: u16| v & max).collect();
945
    let luma_max: i16 = (1 << (bit_depth + 3)) - 1;
946 947 948 949
    let ac: Vec<i16> = (0..(32 * 32))
      .map(|_| ra.gen())
      .map(|v: i16| (v & luma_max) - (luma_max >> 1))
      .collect();
950 951 952 953 954 955
    let alpha = -1 as i16;

    (above, left, ac, alpha, o1, o2)
  }

  fn do_cfl_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
956
    let (above, left, ac, alpha, mut o1, mut o2) = setup_cfl_pred(ra, 8);
957 958 959

    pred_dc_4x4(&mut o1, 32, &above[..4], &left[..4]);
    pred_cfl_4x4(&mut o1, 32, &ac, alpha, 8);
960 961

    Block4x4::pred_cfl(&mut o2, 32, &ac, alpha, 8, &above[..4], &left[..4]);
962 963 964 965

    (o1, o2)
  }

Michael Bebenita's avatar
Michael Bebenita committed
966 967 968 969 970
  fn assert_same(o2: Vec<u16>) {
    for l in o2.chunks(32).take(4) {
      for v in l[..4].windows(2) {
        assert_eq!(v[0], v[1]);
      }
971
    }
Michael Bebenita's avatar
Michael Bebenita committed
972
  }
973

Michael Bebenita's avatar
Michael Bebenita committed
974 975
  #[test]
  fn pred_matches() {
Thomas Daede's avatar
Thomas Daede committed
976
    let mut ra = ChaChaRng::from_seed([0; 32]);
Michael Bebenita's avatar
Michael Bebenita committed
977 978 979
    for _ in 0..MAX_ITER {
      let (o1, o2) = do_dc_pred(&mut ra);
      assert_eq!(o1, o2);
Luca Barbato's avatar
Luca Barbato committed
980

981 982 983
      let (o1, o2) = do_dc_left_pred(&mut ra);
      assert_eq!(o1, o2);

984 985 986
      let (o1, o2) = do_dc_top_pred(&mut ra);
      assert_eq!(o1, o2);

Michael Bebenita's avatar
Michael Bebenita committed
987 988
      let (o1, o2) = do_h_pred(&mut ra);
      assert_eq!(o1, o2);
Luc Trudeau's avatar
Luc Trudeau committed
989

Michael Bebenita's avatar
Michael Bebenita committed
990 991
      let (o1, o2) = do_v_pred(&mut ra);
      assert_eq!(o1, o2);
992

Michael Bebenita's avatar
Michael Bebenita committed
993 994
      let (o1, o2) = do_paeth_pred(&mut ra);
      assert_eq!(o1, o2);
Raphael Zumer's avatar
Raphael Zumer committed
995

Michael Bebenita's avatar
Michael Bebenita committed
996 997
      let (o1, o2) = do_smooth_pred(&mut ra);
      assert_eq!(o1, o2);
998

Michael Bebenita's avatar
Michael Bebenita committed
999 1000
      let (o1, o2) = do_smooth_h_pred(&mut ra);
      assert_eq!(o1, o2);
1001

Michael Bebenita's avatar
Michael Bebenita committed
1002 1003
      let (o1, o2) = do_smooth_v_pred(&mut ra);
      assert_eq!(o1, o2);
1004 1005 1006

      let (o1, o2) = do_cfl_pred(&mut ra);
      assert_eq!(o1, o2);
Luca Barbato's avatar
Luca Barbato committed
1007
    }
Michael Bebenita's avatar
Michael Bebenita committed
1008
  }
Luca Barbato's avatar
Luca Barbato committed
1009

David Michael Barr's avatar
David Michael Barr committed
1010 1011
  #[test]
  fn pred_matches_u8() {
1012 1013 1014
    let mut edge_buf: AlignedArray<[u8; 2 * MAX_TX_SIZE + 1]> =
      UninitializedAlignedArray();
    for i in 0..edge_buf.array.len() {
1015
      edge_buf.array[i] = (i + 32).saturating_sub(MAX_TX_SIZE).as_();
David Michael Barr's avatar
David Michael Barr committed
1016
    }
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
    let left = &edge_buf.array[MAX_TX_SIZE - 4..MAX_TX_SIZE];
    let above = &edge_buf.array[MAX_TX_SIZE + 1..MAX_TX_SIZE + 5];
    let top_left = edge_buf.array[MAX_TX_SIZE];

    let stride = 4;
    let mut output = vec![0u8; 4 * 4];

    Block4x4::pred_dc(&mut output, stride, above, left);
    assert_eq!(output, [32u8; 16]);

    Block4x4::pred_dc_top(&mut output, stride, above, left);
    assert_eq!(output, [35u8; 16]);

    Block4x4::pred_dc_left(&mut output, stride, above, left);
    assert_eq!(output, [30u8; 16]);

    Block4x4::pred_dc_128(&mut output, stride, 8);
    assert_eq!(output, [128u8; 16]);

    Block4x4::pred_v(&mut output, stride, above);
    assert_eq!(
      output,
      [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36]
    );

    Block4x4::pred_h(&mut output, stride, left);
    assert_eq!(
      output,
      [31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28]
    );

    Block4x4::pred_paeth(&mut output, stride, above, left, top_left);
    assert_eq!(
      output,
      [32, 34, 35, 36, 30, 32, 32, 36, 29, 32, 32, 32, 28, 28, 32, 32]
    );

    Block4x4::pred_smooth(&mut output, stride, above, left);
    assert_eq!(
      output,
      [32, 34, 35, 35, 30, 32, 33, 34, 29, 31, 32, 32, 29, 30, 32, 32]
    );

    Block4x4::pred_smooth_h(&mut output, stride, above, left);
    assert_eq!(
      output,
      [31, 33, 34, 35, 30, 33, 34, 35, 29, 32, 34, 34, 28, 31, 33, 34]
    );

    Block4x4::pred_smooth_v(&mut output, stride, above, left);
    assert_eq!(
      output,
      [33, 34, 35, 36, 31, 31, 32, 33, 30, 30, 30, 31, 29, 30, 30, 30]
    );
David Michael Barr's avatar
David Michael Barr committed
1071 1072
  }

Michael Bebenita's avatar
Michael Bebenita committed
1073 1074
  #[test]
  fn pred_same() {
Thomas Daede's avatar
Thomas Daede committed
1075
    let mut ra = ChaChaRng::from_seed([0; 32]);
Michael Bebenita's avatar
Michael Bebenita committed
1076 1077
    for _ in 0..MAX_ITER {
      let (_, o2) = do_dc_pred(&mut ra);
Luca Barbato's avatar
Luca Barbato committed
1078

Michael Bebenita's avatar
Michael Bebenita committed
1079
      assert_same(o2)
Luca Barbato's avatar
Luca Barbato committed
1080
    }
Michael Bebenita's avatar
Michael Bebenita committed
1081
  }
Luca Barbato's avatar
Luca Barbato committed
1082

Michael Bebenita's avatar
Michael Bebenita committed
1083 1084 1085 1086 1087
  #[test]
  fn pred_max() {
    let max12bit = 4096 - 1;
    let above = [max12bit; 32];
    let left = [max12bit; 32];
Luca Barbato's avatar
Luca Barbato committed
1088

Michael Bebenita's avatar
Michael Bebenita committed
1089
    let mut o = vec![0u16; 32 * 32];
Luca Barbato's avatar
Luca Barbato committed
1090

Michael Bebenita's avatar
Michael Bebenita committed
1091
    Block4x4::pred_dc(&mut o, 32, &above[..4], &left[..4]);
Luca Barbato's avatar
Luca Barbato committed
1092

Michael Bebenita's avatar
Michael Bebenita committed
1093 1094 1095 1096 1097
    for l in o.chunks(32).take(4) {
      for v in l[..4].iter() {
        assert_eq!(*v, max12bit);
      }
    }
1098

Michael Bebenita's avatar
Michael Bebenita committed
1099
    Block4x4::pred_h(&mut o, 32, &left[..4]);
1100

Michael Bebenita's avatar
Michael Bebenita committed
1101 1102 1103 1104 1105
    for l in o.chunks(32).take(4) {
      for v in l[..4].iter() {
        assert_eq!(*v, max12bit);
      }
    }
Luc Trudeau's avatar
Luc Trudeau committed
1106

Michael Bebenita's avatar
Michael Bebenita committed
1107
    Block4x4::pred_v(&mut o, 32, &above[..4]);
Luc Trudeau's avatar
Luc Trudeau committed
1108

Michael Bebenita's avatar
Michael Bebenita committed
1109 1110 1111 1112 1113
    for l in o.chunks(32).take(4) {
      for v in l[..4].iter() {
        assert_eq!(*v, max12bit);
      }
    }
1114

Michael Bebenita's avatar
Michael Bebenita committed
1115
    let above_left = unsafe { *above.as_ptr().offset(-1) };
1116

Michael Bebenita's avatar
Michael Bebenita committed
1117
    Block4x4::pred_paeth(&mut o, 32, &above[..4], &left[..4], above_left);
1118

Michael Bebenita's avatar
Michael Bebenita committed
1119 1120 1121 1122 1123
    for l in o.chunks(32).take(4) {
      for v in l[..4].iter() {
        assert_eq!(*v, max12bit);
      }
    }
Raphael Zumer's avatar
Raphael Zumer committed
1124