predict.rs 38.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Raphael Zumer's avatar
Raphael Zumer committed
10
#![allow(non_upper_case_globals)]
Michael Bebenita's avatar
Michael Bebenita committed
11 12
#![cfg_attr(feature = "cargo-clippy", allow(cast_lossless))]
#![cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
Raphael Zumer's avatar
Raphael Zumer committed
13

Raphaël Zumer's avatar
Raphaël Zumer committed
14
use context::{INTRA_MODES, MAX_TX_SIZE};
Michael Bebenita's avatar
Michael Bebenita committed
15
use partition::*;
16
use util::*;
17

Raphaël Zumer's avatar
Raphaël Zumer committed
18 19 20
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
use libc;
use num_traits::*;
21 22 23 24
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
25 26 27
use std::mem::*;
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
use std::ptr;
28

29
pub static RAV1E_INTRA_MODES: &'static [PredictionMode] = &[
Michael Bebenita's avatar
Michael Bebenita committed
30 31 32 33 34 35
  PredictionMode::DC_PRED,
  PredictionMode::H_PRED,
  PredictionMode::V_PRED,
  PredictionMode::SMOOTH_PRED,
  PredictionMode::SMOOTH_H_PRED,
  PredictionMode::SMOOTH_V_PRED,
Frank Bossen's avatar
Frank Bossen committed
36 37 38 39 40 41 42
  PredictionMode::PAETH_PRED,
  PredictionMode::D45_PRED,
  PredictionMode::D135_PRED,
  PredictionMode::D117_PRED,
  PredictionMode::D153_PRED,
  PredictionMode::D207_PRED,
  PredictionMode::D63_PRED,
43 44
];

45
// Intra prediction modes tested at high speed levels
Michael Bebenita's avatar
Michael Bebenita committed
46
#[cfg_attr(rustfmt, rustfmt_skip)]
47 48 49 50 51 52
pub static RAV1E_INTRA_MODES_MINIMAL: &'static [PredictionMode] = &[
    PredictionMode::DC_PRED,
    PredictionMode::H_PRED,
    PredictionMode::V_PRED
];

53
pub static RAV1E_INTER_MODES_MINIMAL: &'static [PredictionMode] = &[
54
  PredictionMode::NEARESTMV
55 56
];

fbossen's avatar
fbossen committed
57 58 59 60 61 62 63 64
pub static RAV1E_INTER_COMPOUND_MODES: &'static [PredictionMode] = &[
  PredictionMode::GLOBAL_GLOBALMV,
  PredictionMode::NEAREST_NEARESTMV,
  PredictionMode::NEW_NEWMV,
  PredictionMode::NEAREST_NEWMV,
  PredictionMode::NEW_NEARESTMV
];

Raphael Zumer's avatar
Raphael Zumer committed
65 66 67 68
// Weights are quadratic from '1' to '1 / block_size', scaled by 2^sm_weight_log2_scale.
const sm_weight_log2_scale: u8 = 8;

// Smooth predictor weights
Michael Bebenita's avatar
Michael Bebenita committed
69
#[cfg_attr(rustfmt, rustfmt_skip)]
Raphael Zumer's avatar
Raphael Zumer committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
static sm_weight_arrays: [u8; 2 * MAX_TX_SIZE] = [
    // Unused, because we always offset by bs, which is at least 2.
    0, 0,
    // bs = 2
    255, 128,
    // bs = 4
    255, 149, 85, 64,
    // bs = 8
    255, 197, 146, 105, 73, 50, 37, 32,
    // bs = 16
    255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
    // bs = 32
    255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
    66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
    // bs = 64
85
    255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
Raphael Zumer's avatar
Raphael Zumer committed
86 87
    150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
    65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
88
    13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
Raphael Zumer's avatar
Raphael Zumer committed
89 90
];

Yushin Cho's avatar
Yushin Cho committed
91 92 93 94 95 96 97 98 99 100 101
const NEED_LEFT: u8 = 1 << 1;
const NEED_ABOVE: u8 = 1 << 2;
const NEED_ABOVERIGHT: u8 = 1 << 3;
const NEED_ABOVELEFT: u8 = 1 << 4;
const NEED_BOTTOMLEFT: u8 = 1 << 5;

/*const INTRA_EDGE_FILT: usize = 3;
const INTRA_EDGE_TAPS: usize = 5;
const MAX_UPSAMPLE_SZ: usize = 16;*/

pub static extend_modes: [u8; INTRA_MODES] = [
102 103 104 105 106 107 108 109 110 111 112 113 114
  NEED_ABOVE | NEED_LEFT,                  // DC
  NEED_ABOVE,                              // V
  NEED_LEFT,                               // H
  NEED_ABOVE | NEED_ABOVERIGHT,            // D45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
  NEED_LEFT | NEED_BOTTOMLEFT,             // D203
  NEED_ABOVE | NEED_ABOVERIGHT,            // D67
  NEED_LEFT | NEED_ABOVE,                  // SMOOTH
  NEED_LEFT | NEED_ABOVE,                  // SMOOTH_V
  NEED_LEFT | NEED_ABOVE,                  // SMOOTH_H
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT  // PAETH
Yushin Cho's avatar
Yushin Cho committed
115 116
];

117
pub trait Dim {
Michael Bebenita's avatar
Michael Bebenita committed
118 119
  const W: usize;
  const H: usize;
120 121
}

122 123 124 125 126 127 128 129 130
macro_rules! block_dimension {
  ($W:expr, $H:expr) => {
    paste::item! {
      pub struct [<Block $W x $H>];

      impl Dim for [<Block $W x $H>] {
        const W: usize = $W;
        const H: usize = $H;
      }
131 132 133

      impl Intra<u8> for [<Block $W x $H>] {}
      impl Intra<u16> for [<Block $W x $H>] {}
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
    }
  };
}

macro_rules! blocks_dimension {
  ($(($W:expr, $H:expr)),+) => {
    $(
      block_dimension! { $W, $H }
    )*
  }
}

blocks_dimension! { (4, 4), (8, 8), (16, 16), (32, 32), (64, 64) }
blocks_dimension! { (4, 8), (8, 16), (16, 32), (32, 64) }
blocks_dimension! { (8, 4), (16, 8), (32, 16), (64, 32) }
blocks_dimension! { (4, 16), (8, 32), (16, 64) }
blocks_dimension! { (16, 4), (32, 8), (64, 16) }
151

152 153 154 155 156 157 158 159 160 161 162
#[inline(always)]
fn get_scaled_luma_q0(alpha_q3: i16, ac_pred_q3: i16) -> i32 {
  let scaled_luma_q6 = (alpha_q3 as i32) * (ac_pred_q3 as i32);
  let abs_scaled_luma_q0 = (scaled_luma_q6.abs() + 32) >> 6;
  if scaled_luma_q6 < 0 {
    -abs_scaled_luma_q0
  } else {
    abs_scaled_luma_q0
  }
}

Luca Barbato's avatar
Luca Barbato committed
163
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
164 165 166 167 168 169 170 171 172
macro_rules! decl_angular_ipred_fn {
  ($f:ident) => {
    extern {
      fn $f(
        dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
        width: libc::c_int, height: libc::c_int, angle: libc::c_int
      );
    }
  };
David Michael Barr's avatar
David Michael Barr committed
173 174
}

Luca Barbato's avatar
Luca Barbato committed
175
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
176
decl_angular_ipred_fn!(rav1e_ipred_dc_avx2);
Luca Barbato's avatar
Luca Barbato committed
177
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
178
decl_angular_ipred_fn!(rav1e_ipred_dc_128_avx2);
Luca Barbato's avatar
Luca Barbato committed
179
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
180
decl_angular_ipred_fn!(rav1e_ipred_dc_left_avx2);
Luca Barbato's avatar
Luca Barbato committed
181
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
182
decl_angular_ipred_fn!(rav1e_ipred_dc_top_avx2);
Luca Barbato's avatar
Luca Barbato committed
183
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
184
decl_angular_ipred_fn!(rav1e_ipred_h_avx2);
Luca Barbato's avatar
Luca Barbato committed
185
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
186
decl_angular_ipred_fn!(rav1e_ipred_v_avx2);
Luca Barbato's avatar
Luca Barbato committed
187
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
188
decl_angular_ipred_fn!(rav1e_ipred_paeth_avx2);
Luca Barbato's avatar
Luca Barbato committed
189
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
190
decl_angular_ipred_fn!(rav1e_ipred_smooth_avx2);
Luca Barbato's avatar
Luca Barbato committed
191
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
192
decl_angular_ipred_fn!(rav1e_ipred_smooth_h_avx2);
Luca Barbato's avatar
Luca Barbato committed
193
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
194
decl_angular_ipred_fn!(rav1e_ipred_smooth_v_avx2);
195

196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
macro_rules! decl_cfl_pred_fn {
  ($f:ident) => {
    extern {
      fn $f(
        dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
        width: libc::c_int, height: libc::c_int, ac: *const u8,
        alpha: libc::c_int
      );
    }
  };
}

#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_avx2);
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_128_avx2);
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_left_avx2);
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
decl_cfl_pred_fn!(rav1e_ipred_cfl_top_avx2);

218 219
pub trait Intra<T>: Dim
where
220
  T: Pixel,
221 222 223 224
  i32: AsPrimitive<T>,
  u32: AsPrimitive<T>,
  usize: AsPrimitive<T>
{
rzumer's avatar
rzumer committed
225
  #[cfg_attr(feature = "comparative_bench", inline(never))]
226
  fn pred_dc(output: &mut [T], stride: usize, above: &[T], left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
227
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
228 229 230 231 232 233 234 235 236 237 238 239 240 241
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
242 243 244
    let edges = left[..Self::H].iter().chain(above[..Self::W].iter());
    let len = (Self::W + Self::H) as u32;
    let avg =
245
      ((edges.fold(0u32, |acc, &v| { let v: u32 = v.into(); v + acc }) + (len >> 1)) / len).as_();
Michael Bebenita's avatar
Michael Bebenita committed
246 247 248 249 250

    for line in output.chunks_mut(stride).take(Self::H) {
      for v in &mut line[..Self::W] {
        *v = avg;
      }
Luca Barbato's avatar
Luca Barbato committed
251
    }
Michael Bebenita's avatar
Michael Bebenita committed
252
  }
Luca Barbato's avatar
Luca Barbato committed
253

rzumer's avatar
rzumer committed
254
  #[cfg_attr(feature = "comparative_bench", inline(never))]
255
  fn pred_dc_128(output: &mut [T], stride: usize, bit_depth: usize) {
Luca Barbato's avatar
Luca Barbato committed
256
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
David Michael Barr's avatar
David Michael Barr committed
257 258 259 260 261 262 263 264 265 266 267 268 269 270
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_128_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            ptr::null(),
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
271 272
    for y in 0..Self::H {
      for x in 0..Self::W {
273
        output[y * stride + x] = (128u32 << (bit_depth - 8)).as_();
Michael Bebenita's avatar
Michael Bebenita committed
274
      }
275
    }
Michael Bebenita's avatar
Michael Bebenita committed
276 277
  }

rzumer's avatar
rzumer committed
278
  #[cfg_attr(feature = "comparative_bench", inline(never))]
279
  fn pred_dc_left(output: &mut [T], stride: usize, _above: &[T], left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
280
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
281 282 283 284 285 286
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_left_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
287
            left.as_ptr().add(Self::H) as *const _,
288 289 290 291 292 293 294
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
295 296
    let sum = left[..Self::H].iter().fold(0u32, |acc, &v| { let v: u32 = v.into(); v + acc });
    let avg = ((sum + (Self::H >> 1) as u32) / Self::H as u32).as_();
297 298
    for line in output.chunks_mut(stride).take(Self::H) {
      line[..Self::W].iter_mut().for_each(|v| *v = avg);
299
    }
Michael Bebenita's avatar
Michael Bebenita committed
300 301
  }

rzumer's avatar
rzumer committed
302
  #[cfg_attr(feature = "comparative_bench", inline(never))]
303
  fn pred_dc_top(output: &mut [T], stride: usize, above: &[T], _left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
304
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
305 306 307 308 309 310 311 312 313 314 315 316 317 318
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_dc_top_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
319 320
    let sum = above[..Self::W].iter().fold(0u32, |acc, &v| { let v: u32 = v.into(); v + acc });
    let avg = ((sum + (Self::W >> 1) as u32) / Self::W as u32).as_();
321 322
    for line in output.chunks_mut(stride).take(Self::H) {
      line[..Self::W].iter_mut().for_each(|v| *v = avg);
323
    }
Michael Bebenita's avatar
Michael Bebenita committed
324
  }
325

rzumer's avatar
rzumer committed
326
  #[cfg_attr(feature = "comparative_bench", inline(never))]
327
  fn pred_h(output: &mut [T], stride: usize, left: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
328
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
329 330 331 332 333 334
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_h_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
335
            left.as_ptr().add(Self::H) as *const _,
336 337 338 339 340 341 342
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
343 344 345
    for (line, l) in
      output.chunks_mut(stride).zip(left[..Self::H].iter().rev())
    {
Michael Bebenita's avatar
Michael Bebenita committed
346 347 348
      for v in &mut line[..Self::W] {
        *v = *l;
      }
349
    }
Michael Bebenita's avatar
Michael Bebenita committed
350
  }
351

rzumer's avatar
rzumer committed
352
  #[cfg_attr(feature = "comparative_bench", inline(never))]
353
  fn pred_v(output: &mut [T], stride: usize, above: &[T]) {
Luca Barbato's avatar
Luca Barbato committed
354
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
355 356 357 358 359 360 361 362 363 364 365 366 367 368
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_v_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
369 370
    for line in output.chunks_mut(stride).take(Self::H) {
      line[..Self::W].clone_from_slice(&above[..Self::W])
Luc Trudeau's avatar
Luc Trudeau committed
371
    }
Michael Bebenita's avatar
Michael Bebenita committed
372 373
  }

rzumer's avatar
rzumer committed
374
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
375
  fn pred_paeth(
376 377
    output: &mut [T], stride: usize, above: &[T], left: &[T],
    above_left: T
Michael Bebenita's avatar
Michael Bebenita committed
378
  ) {
Luca Barbato's avatar
Luca Barbato committed
379
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
380 381 382 383 384 385 386 387 388 389 390 391 392 393
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_paeth_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
394 395 396
    for r in 0..Self::H {
      for c in 0..Self::W {
        // Top-left pixel is fixed in libaom
397
        let raw_top_left: i32 = above_left.into();
398
        let raw_left: i32 = left[Self::H - 1 - r].into();
399
        let raw_top: i32 = above[c].into();
Michael Bebenita's avatar
Michael Bebenita committed
400 401 402 403 404 405 406 407 408 409

        let p_base = raw_top + raw_left - raw_top_left;
        let p_left = (p_base - raw_left).abs();
        let p_top = (p_base - raw_top).abs();
        let p_top_left = (p_base - raw_top_left).abs();

        let output_index = r * stride + c;

        // Return nearest to base of left, top and top_left
        if p_left <= p_top && p_left <= p_top_left {
410
          output[output_index] = raw_left.as_();
Michael Bebenita's avatar
Michael Bebenita committed
411
        } else if p_top <= p_top_left {
412
          output[output_index] = raw_top.as_();
Michael Bebenita's avatar
Michael Bebenita committed
413
        } else {
414
          output[output_index] = raw_top_left.as_();
415
        }
Michael Bebenita's avatar
Michael Bebenita committed
416
      }
417
    }
Michael Bebenita's avatar
Michael Bebenita committed
418 419
  }

rzumer's avatar
rzumer committed
420
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
421
  fn pred_smooth(
422
    output: &mut [T], stride: usize, above: &[T], left: &[T]
Michael Bebenita's avatar
Michael Bebenita committed
423
  ) {
Luca Barbato's avatar
Luca Barbato committed
424
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
425 426 427 428 429 430 431 432 433 434 435 436 437 438
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_smooth_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
439
    let below_pred = left[0]; // estimated by bottom-left pixel
Michael Bebenita's avatar
Michael Bebenita committed
440 441 442 443 444
    let right_pred = above[Self::W - 1]; // estimated by top-right pixel
    let sm_weights_w = &sm_weight_arrays[Self::W..];
    let sm_weights_h = &sm_weight_arrays[Self::H..];

    let log2_scale = 1 + sm_weight_log2_scale;
445
    let scale = 1_u16 << sm_weight_log2_scale;
Michael Bebenita's avatar
Michael Bebenita committed
446 447 448 449 450 451 452 453 454 455

    // Weights sanity checks
    assert!((sm_weights_w[0] as u16) < scale);
    assert!((sm_weights_h[0] as u16) < scale);
    assert!((scale - sm_weights_w[Self::W - 1] as u16) < scale);
    assert!((scale - sm_weights_h[Self::H - 1] as u16) < scale);
    assert!(log2_scale as usize + size_of_val(&output[0]) < 31); // ensures no overflow when calculating predictor

    for r in 0..Self::H {
      for c in 0..Self::W {
456
        let pixels = [above[c], below_pred, left[Self::H - 1 - r], right_pred];
Michael Bebenita's avatar
Michael Bebenita committed
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473

        let weights = [
          sm_weights_h[r] as u16,
          scale - sm_weights_h[r] as u16,
          sm_weights_w[c] as u16,
          scale - sm_weights_w[c] as u16
        ];

        assert!(
          scale >= (sm_weights_h[r] as u16)
            && scale >= (sm_weights_w[c] as u16)
        );

        // Sum up weighted pixels
        let mut this_pred: u32 = weights
          .iter()
          .zip(pixels.iter())
474
          .map(|(w, p)| { let p: u32 = (*p).into(); (*w as u32) * p })
Michael Bebenita's avatar
Michael Bebenita committed
475 476 477 478 479
          .sum();
        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;

        let output_index = r * stride + c;

480
        output[output_index] = this_pred.as_();
Michael Bebenita's avatar
Michael Bebenita committed
481
      }
Raphael Zumer's avatar
Raphael Zumer committed
482
    }
Michael Bebenita's avatar
Michael Bebenita committed
483
  }
484

rzumer's avatar
rzumer committed
485
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
486
  fn pred_smooth_h(
487
    output: &mut [T], stride: usize, above: &[T], left: &[T]
Michael Bebenita's avatar
Michael Bebenita committed
488
  ) {
Luca Barbato's avatar
Luca Barbato committed
489
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
490 491 492 493 494 495 496 497 498 499 500 501 502 503
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_smooth_h_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
Michael Bebenita's avatar
Michael Bebenita committed
504 505
    let right_pred = above[Self::W - 1]; // estimated by top-right pixel
    let sm_weights = &sm_weight_arrays[Self::W..];
506

Michael Bebenita's avatar
Michael Bebenita committed
507 508
    let log2_scale = sm_weight_log2_scale;
    let scale = 1_u16 << sm_weight_log2_scale;
509

Michael Bebenita's avatar
Michael Bebenita committed
510 511 512 513
    // Weights sanity checks
    assert!((sm_weights[0] as u16) < scale);
    assert!((scale - sm_weights[Self::W - 1] as u16) < scale);
    assert!(log2_scale as usize + size_of_val(&output[0]) < 31); // ensures no overflow when calculating predictor
514

Michael Bebenita's avatar
Michael Bebenita committed
515 516
    for r in 0..Self::H {
      for c in 0..Self::W {
517
        let pixels = [left[Self::H - 1 - r], right_pred];
Michael Bebenita's avatar
Michael Bebenita committed
518
        let weights = [sm_weights[c] as u16, scale - sm_weights[c] as u16];
519

Michael Bebenita's avatar
Michael Bebenita committed
520
        assert!(scale >= sm_weights[c] as u16);
521

Michael Bebenita's avatar
Michael Bebenita committed
522 523 524
        let mut this_pred: u32 = weights
          .iter()
          .zip(pixels.iter())
525
          .map(|(w, p)| { let p: u32 = (*p).into(); (*w as u32) * p })
Michael Bebenita's avatar
Michael Bebenita committed
526 527
          .sum();
        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
528

Michael Bebenita's avatar
Michael Bebenita committed
529
        let output_index = r * stride + c;
530

531
        output[output_index] = this_pred.as_();
Michael Bebenita's avatar
Michael Bebenita committed
532
      }
533
    }
Michael Bebenita's avatar
Michael Bebenita committed
534
  }
535

rzumer's avatar
rzumer committed
536
  #[cfg_attr(feature = "comparative_bench", inline(never))]
Michael Bebenita's avatar
Michael Bebenita committed
537
  fn pred_smooth_v(
538
    output: &mut [T], stride: usize, above: &[T], left: &[T]
Michael Bebenita's avatar
Michael Bebenita committed
539
  ) {
Luca Barbato's avatar
Luca Barbato committed
540
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
541 542 543 544 545 546 547 548 549 550 551 552 553 554
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_smooth_v_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            0
          )
        };
      }
    }
555
    let below_pred = left[0]; // estimated by bottom-left pixel
Michael Bebenita's avatar
Michael Bebenita committed
556
    let sm_weights = &sm_weight_arrays[Self::H..];
557

Michael Bebenita's avatar
Michael Bebenita committed
558 559
    let log2_scale = sm_weight_log2_scale;
    let scale = 1_u16 << sm_weight_log2_scale;
560

Michael Bebenita's avatar
Michael Bebenita committed
561 562 563 564
    // Weights sanity checks
    assert!((sm_weights[0] as u16) < scale);
    assert!((scale - sm_weights[Self::H - 1] as u16) < scale);
    assert!(log2_scale as usize + size_of_val(&output[0]) < 31); // ensures no overflow when calculating predictor
565

Michael Bebenita's avatar
Michael Bebenita committed
566 567 568 569
    for r in 0..Self::H {
      for c in 0..Self::W {
        let pixels = [above[c], below_pred];
        let weights = [sm_weights[r] as u16, scale - sm_weights[r] as u16];
570

Michael Bebenita's avatar
Michael Bebenita committed
571
        assert!(scale >= sm_weights[r] as u16);
572

Michael Bebenita's avatar
Michael Bebenita committed
573 574 575
        let mut this_pred: u32 = weights
          .iter()
          .zip(pixels.iter())
576
          .map(|(w, p)| { let p: u32 = (*p).into(); (*w as u32) * p })
Michael Bebenita's avatar
Michael Bebenita committed
577 578
          .sum();
        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
579

Michael Bebenita's avatar
Michael Bebenita committed
580
        let output_index = r * stride + c;
581

582
        output[output_index] = this_pred.as_();
Michael Bebenita's avatar
Michael Bebenita committed
583
      }
584
    }
Michael Bebenita's avatar
Michael Bebenita committed
585
  }
586

587 588 589
  #[target_feature(enable = "ssse3")]
  #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
  unsafe fn pred_cfl_ssse3(
590
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16,
591 592 593 594
    bit_depth: usize
  ) {
    let alpha_sign = _mm_set1_epi16(alpha);
    let alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9);
595 596
    let dc_scalar: u32 = (*output.as_ptr()).into();
    let dc_q0 = _mm_set1_epi16(dc_scalar as i16);
597 598 599
    let max = _mm_set1_epi16((1 << bit_depth) - 1);

    for j in 0..Self::H {
600
      let luma = ac.as_ptr().add(Self::W * j);
601
      let line = output.as_mut_ptr().add(stride * j);
602 603

      let mut i = 0isize;
604
      let mut last = _mm_setzero_si128();
605 606 607
      while (i as usize) < Self::W {
        let ac_q3 = _mm_loadu_si128(luma.offset(i) as *const _);
        let ac_sign = _mm_sign_epi16(alpha_sign, ac_q3);
608 609
        let abs_scaled_luma_q0 =
          _mm_mulhrs_epi16(_mm_abs_epi16(ac_q3), alpha_q12);
610 611
        let scaled_luma_q0 = _mm_sign_epi16(abs_scaled_luma_q0, ac_sign);
        let pred = _mm_add_epi16(scaled_luma_q0, dc_q0);
612 613 614 615 616 617 618 619 620 621 622 623 624 625
        if size_of::<T>() == 1 {
          if Self::W < 16 {
            let res = _mm_packus_epi16(pred, pred);
            if Self::W == 4 {
               *(line.offset(i) as *mut i32) = _mm_cvtsi128_si32(res);
            } else {
              _mm_storel_epi64(line.offset(i) as *mut _, res);
            }
          } else if (i & 15) == 0 {
            last = pred;
          } else {
            let res = _mm_packus_epi16(last, pred);
            _mm_storeu_si128(line.offset(i - 8) as *mut _, res);
          }
626
        } else {
627 628 629 630 631 632
          let res = _mm_min_epi16(max, _mm_max_epi16(pred, _mm_setzero_si128()));
          if Self::W == 4 {
            _mm_storel_epi64(line.offset(i) as *mut _, res);
          } else {
            _mm_storeu_si128(line.offset(i) as *mut _, res);
          }
633 634 635 636 637 638
        }
        i += 8;
      }
    }
  }

639 640
  fn pred_cfl_inner(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize
641
  ) {
642 643 644
    if alpha == 0 {
      return;
    }
645 646 647 648 649
    assert!(32 >= Self::W);
    assert!(ac.len() >= 32 * (Self::H - 1) + Self::W);
    assert!(stride >= Self::W);
    assert!(output.len() >= stride * (Self::H - 1) + Self::W);
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
650 651 652 653 654 655
    {
      if is_x86_feature_detected!("ssse3") {
        return unsafe {
          Self::pred_cfl_ssse3(output, stride, ac, alpha, bit_depth)
        };
      }
656 657
    }

658
    let sample_max = (1 << bit_depth) - 1;
659
    let avg: i32 = output[0].into();
660 661

    for (line, luma) in
662
      output.chunks_mut(stride).zip(ac.chunks(Self::W)).take(Self::H)
663 664 665
    {
      for (v, &l) in line[..Self::W].iter_mut().zip(luma[..Self::W].iter()) {
        *v =
666
          (avg + get_scaled_luma_q0(alpha, l)).max(0).min(sample_max).as_();
667 668 669
      }
    }
  }
670 671 672 673 674 675

  #[cfg_attr(feature = "comparative_bench", inline(never))]
  fn pred_cfl(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize,
    above: &[T], left: &[T]
  ) {
676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
692 693 694 695 696 697 698
    Self::pred_dc(output, stride, above, left);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }

  fn pred_cfl_128(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize
  ) {
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_128_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            ptr::null(),
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
715 716 717 718 719 720 721 722
    Self::pred_dc_128(output, stride, bit_depth);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }

  fn pred_cfl_left(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize,
    above: &[T], left: &[T]
  ) {
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_left_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
739 740 741 742 743 744 745 746
    Self::pred_dc_left(output, stride, above, left);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }

  fn pred_cfl_top(
    output: &mut [T], stride: usize, ac: &[i16], alpha: i16, bit_depth: usize,
    above: &[T], left: &[T]
  ) {
747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
        return unsafe {
          rav1e_ipred_cfl_top_avx2(
            output.as_mut_ptr() as *mut _,
            stride as libc::ptrdiff_t,
            above.as_ptr().offset(-1) as *const _,
            Self::W as libc::c_int,
            Self::H as libc::c_int,
            ac.as_ptr() as *const _,
            alpha as libc::c_int
          )
        }
      }
    }
763 764 765
    Self::pred_dc_top(output, stride, above, left);
    Self::pred_cfl_inner(output, stride, &ac, alpha, bit_depth);
  }
Frank Bossen's avatar
Frank Bossen committed
766 767 768 769 770 771

  #[cfg_attr(feature = "comparative_bench", inline(never))]
  fn pred_directional(
    output: &mut [T], stride: usize, above: &[T], left: &[T], top_left: &[T], angle: usize, bit_depth: usize
  ) {
    let sample_max = ((1 << bit_depth) - 1) as i32;
772
    let _angle_delta = 0;
Frank Bossen's avatar
Frank Bossen committed
773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883

    let p_angle = angle; // TODO use Mode_to_Angle

    let upsample_above = 0;
    let upsample_left = 0;

    let enable_intra_edge_filter = false; // FIXME

    if enable_intra_edge_filter {
      // TODO
    }

    fn dr_intra_derivative(p_angle: usize) -> usize {
      match p_angle {
        4 => 1023,
        7 => 547,
        10 => 372,
        14 => 273,
        17 => 215,
        20 => 178,
        23 => 151,
        26 => 132,
        29 => 116,
        32 => 102,
        36 => 90,
        39 => 80,
        42 => 71,
        45 => 64,
        48 => 57,
        51 => 51,
        54 => 45,
        58 => 40,
        61 => 35,
        64 => 31,
        67 => 27,
        70 => 23,
        73 => 19,
        76 => 15,
        81 => 11,
        84 => 7,
        87 => 3,
        _ => 0
      }
    }

    let dx = if p_angle < 90 {
      dr_intra_derivative(p_angle)
    } else if p_angle > 90 && p_angle < 180 {
      dr_intra_derivative(180 - p_angle)
    } else {
      0 // undefined
    };

    let dy = if p_angle > 90 && p_angle < 180 {
      dr_intra_derivative(p_angle - 90)
    } else if p_angle > 180 {
      dr_intra_derivative(270 - p_angle)
    } else {
      0 // undefined
    };

    if p_angle < 90 {
      for i in 0..Self::H {
        for j in 0..Self::W {
          let idx = (i + 1) * dx;
          let base = (idx >> (6 - upsample_above)) + (j << upsample_above);
          let shift = (((idx << upsample_above) >> 1) & 31) as i32;
          let max_base_x = (Self::H + Self::W - 1) << upsample_above;
          output[i * stride + j] = if base < max_base_x {
            let a: i32 = above[base].into();
            let b: i32 = above[base + 1].into();
            (a * (32 - shift) + b * shift + 16) >> 5
          } else {
            let c: i32 = above[max_base_x].into();
            c
          }.max(0).min(sample_max).as_();
        }
      }
    } else if p_angle > 90 && p_angle < 180 {
      for i in 0..Self::H {
        for j in 0..Self::W {
          let idx = (j << 6) as isize - ((i + 1) * dx) as isize;
          let base = idx >> (6 - upsample_above);
          if base >= -(1 << upsample_above) {
            let shift = (((idx << upsample_above) >> 1) & 31) as i32;
            let a: i32 = if base < 0 { top_left[0] } else { above[base as usize] }.into();
            let b: i32 = above[(base + 1) as usize].into();
            output[i * stride + j] = ((a * (32 - shift) + b * shift + 16) >> 5).max(0).min(sample_max).as_();
          } else {
            let idx = (i << 6) as isize - ((j + 1) * dy) as isize;
            let base = idx >> (6 - upsample_left);
            let shift = (((idx << upsample_left) >> 1) & 31) as i32;
            let a: i32 = if base < 0 { top_left[0] } else { left[Self::W + Self::H - 1 - base as usize] }.into();
            let b: i32 = left[Self::W + Self::H - (2 + base) as usize].into();
            output[i * stride + j] = ((a * (32 - shift) + b * shift + 16) >> 5).max(0).min(sample_max).as_();
          }
        }
      }
    } else if p_angle > 180 {
      for i in 0..Self::H {
        for j in 0..Self::W {
          let idx = (j + 1) * dy;
          let base = (idx >> (6 - upsample_left)) + (i << upsample_left);
          let shift = (((idx << upsample_left) >> 1) & 31) as i32;
          let a: i32 = left[Self::W + Self::H - 1 - base].into();
          let b: i32 = left[Self::W + Self::H - 2 - base].into();
          output[i * stride + j] = ((a * (32 - shift) + b * shift + 16) >> 5).max(0).min(sample_max).as_();
        }
      }
    }
  }
Luc Trudeau's avatar
Luc Trudeau committed
884 885
}

Frank Bossen's avatar
Frank Bossen committed
886

887
pub trait Inter: Dim {}
888

889
#[cfg(all(test, feature = "aom"))]
890
pub mod test {
Michael Bebenita's avatar
Michael Bebenita committed
891
  use super::*;
Thomas Daede's avatar
Thomas Daede committed
892
  use rand::{ChaChaRng, Rng, SeedableRng};
Raphaël Zumer's avatar
Raphaël Zumer committed
893
  use util::*;
Michael Bebenita's avatar
Michael Bebenita committed
894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909

  const MAX_ITER: usize = 50000;

  fn setup_pred(
    ra: &mut ChaChaRng
  ) -> (Vec<u16>, Vec<u16>, Vec<u16>, Vec<u16>) {
    let output = vec![0u16; 32 * 32];
    let above: Vec<u16> = (0..32).map(|_| ra.gen()).collect();
    let left: Vec<u16> = (0..32).map(|_| ra.gen()).collect();

    let o1 = output.clone();
    let o2 = output.clone();

    (above, left, o1, o2)
  }

910 911 912 913 914 915 916 917 918
  macro_rules! wrap_aom_pred_fn {
    ($fn_4x4:ident, $aom_fn:ident) => {
      extern {
        fn $aom_fn(
          dst: *mut u16, stride: libc::ptrdiff_t, bw: libc::c_int,
          bh: libc::c_int, above: *const u16, left: *const u16,
          bd: libc::c_int
        );
      }
Michael Bebenita's avatar
Michael Bebenita committed
919

920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
      fn $fn_4x4(
        output: &mut [u16], stride: usize, above: &[u16], left: &[u16]
      ) {
        let mut left = left.to_vec();
        left.reverse();
        unsafe {
          $aom_fn(
            output.as_mut_ptr(),
            stride as libc::ptrdiff_t,
            4,
            4,
            above.as_ptr(),
            left.as_ptr(),
            8
          );
        }
      }
    };
Michael Bebenita's avatar
Michael Bebenita committed
938 939
  }

940 941 942 943 944 945 946 947 948
  wrap_aom_pred_fn!(pred_dc_4x4, highbd_dc_predictor);
  wrap_aom_pred_fn!(pred_dc_left_4x4, highbd_dc_left_predictor);
  wrap_aom_pred_fn!(pred_dc_top_4x4, highbd_dc_top_predictor);
  wrap_aom_pred_fn!(pred_h_4x4, highbd_h_predictor);
  wrap_aom_pred_fn!(pred_v_4x4, highbd_v_predictor);
  wrap_aom_pred_fn!(pred_paeth_4x4, highbd_paeth_predictor);
  wrap_aom_pred_fn!(pred_smooth_4x4, highbd_smooth_predictor);
  wrap_aom_pred_fn!(pred_smooth_h_4x4, highbd_smooth_h_predictor);
  wrap_aom_pred_fn!(pred_smooth_v_4x4, highbd_smooth_v_predictor);
Michael Bebenita's avatar
Michael Bebenita committed
949

950 951 952 953 954 955
  extern {
    fn cfl_predict_hbd_c(
      ac_buf_q3: *const i16, dst: *mut u16, stride: libc::ptrdiff_t,
      alpha_q3: libc::c_int, bd: libc::c_int, bw: libc::c_int,
      bh: libc::c_int
    );
Michael Bebenita's avatar
Michael Bebenita committed
956
  }
957

958 959 960
  pub fn pred_cfl_4x4(
    output: &mut [u16], stride: usize, ac: &[i16], alpha: i16, bd: i32
  ) {
961 962 963 964
    let mut ac32 = [0; 4*32];
    for (l32, l) in ac32.chunks_mut(32).zip(ac.chunks(4).take(4)) {
        l32[..4].copy_from_slice(&l);
    }
965 966
    unsafe {
      cfl_predict_hbd_c(
967
        ac32.as_ptr(),
968 969 970 971 972 973 974 975 976 977
        output.as_mut_ptr(),
        stride as libc::ptrdiff_t,
        alpha as libc::c_int,
        bd,
        4,
        4
      );
    }
  }

Michael Bebenita's avatar
Michael Bebenita committed
978 979
  fn do_dc_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
Luca Barbato's avatar
Luca Barbato committed
980

Michael Bebenita's avatar
Michael Bebenita committed
981 982
    pred_dc_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_dc(&mut o2, 32, &above[..4], &left[..4]);
Luca Barbato's avatar
Luca Barbato committed
983

Michael Bebenita's avatar
Michael Bebenita committed
984 985
    (o1, o2)
  }
Luca Barbato's avatar
Luca Barbato committed
986

987 988 989 990 991 992 993 994 995
  fn do_dc_left_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);

    pred_dc_left_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_dc_left(&mut o2, 32, &above[..4], &left[..4]);

    (o1, o2)
  }

996 997 998 999 1000 1001 1002 1003 1004
  fn do_dc_top_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);

    pred_dc_top_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_dc_top(&mut o2, 32, &above[..4], &left[..4]);

    (o1, o2)
  }

Michael Bebenita's avatar
Michael Bebenita committed
1005 1006
  fn do_h_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
1007

Michael Bebenita's avatar
Michael Bebenita committed
1008 1009
    pred_h_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_h(&mut o2, 32, &left[..4]);
1010

Michael Bebenita's avatar
Michael Bebenita committed
1011 1012
    (o1, o2)
  }
1013

Michael Bebenita's avatar
Michael Bebenita committed
1014 1015
  fn do_v_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
Luc Trudeau's avatar
Luc Trudeau committed
1016

Michael Bebenita's avatar
Michael Bebenita committed
1017 1018
    pred_v_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_v(&mut o2, 32, &above[..4]);
Luc Trudeau's avatar
Luc Trudeau committed
1019

Michael Bebenita's avatar
Michael Bebenita committed
1020 1021
    (o1, o2)
  }
Luc Trudeau's avatar
Luc Trudeau committed
1022

Michael Bebenita's avatar
Michael Bebenita committed
1023 1024 1025
  fn do_paeth_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
    let above_left = unsafe { *above.as_ptr().offset(-1) };
1026

Michael Bebenita's avatar
Michael Bebenita committed
1027 1028
    pred_paeth_4x4(&mut o1, 32, &above[..4], &left[..4]);
    Block4x4::pred_paeth(&mut o2, 32, &above[..4], &left[..4], above_left);
1029

Michael Bebenita's avatar
Michael Bebenita committed
1030 1031
    (o1, o2)
  }
1032

Michael Bebenita's avatar
Michael Bebenita committed
1033 1034
  fn do_smooth_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
Raphael Zumer's avatar
Raphael Zumer committed
1035

Michael Bebenita's avatar
Michael Bebenita committed
1036
    pred_smooth_4x4(&mut o1, 32, &above[..4], &left[..4]);
rzumer's avatar
rzumer committed
1037
    Block4x4::pred_smooth(&mut o2, 32, &above[..4], &left[..4]);
Raphael Zumer's avatar
Raphael Zumer committed
1038

Michael Bebenita's avatar
Michael Bebenita committed
1039 1040
    (o1, o2)
  }
Raphael Zumer's avatar
Raphael Zumer committed
1041

Michael Bebenita's avatar
Michael Bebenita committed
1042 1043
  fn do_smooth_h_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
1044

Michael Bebenita's avatar
Michael Bebenita committed
1045
    pred_smooth_h_4x4(&mut o1, 32, &above[..4], &left[..4]);
rzumer's avatar
rzumer committed
1046
    Block4x4::pred_smooth_h(&mut o2, 32, &above[..4], &left[..4]);
1047

Michael Bebenita's avatar
Michael Bebenita committed
1048 1049
    (o1, o2)
  }
1050

Michael Bebenita's avatar
Michael Bebenita committed
1051 1052
  fn do_smooth_v_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
    let (above, left, mut o1, mut o2) = setup_pred(ra);
1053

Michael Bebenita's avatar
Michael Bebenita committed
1054
    pred_smooth_v_4x4(&mut o1, 32, &above[..4], &left[..4]);
rzumer's avatar
rzumer committed
1055
    Block4x4::pred_smooth_v(&mut o2, 32, &above[..4], &left[..4]);
1056

Michael Bebenita's avatar
Michael Bebenita committed
1057 1058
    (o1, o2)
  }
1059

1060
  fn setup_cfl_pred(
1061
    ra: &mut ChaChaRng, bit_depth: usize
1062 1063 1064
  ) -> (Vec<u16>, Vec<u16>, Vec<i16>, i16, Vec<u16>, Vec<u16>) {
    let o1 = vec![0u16; 32 * 32];
    let o2 = vec![0u16; 32 * 32];
1065
    let max: u16 = (1 << bit_depth) - 1;
1066 1067 1068 1069
    let above: Vec<u16> =
      (0..32).map(|_| ra.gen()).map(|v: u16| v & max).collect();
    let left: Vec<u16> =
      (0..32).map(|_| ra.gen()).map(|v: u16| v & max).collect();
1070
    let luma_max: i16 = (1 << (bit_depth + 3)) - 1;
1071 1072 1073 1074
    let ac: Vec<i16> = (0..(32 * 32))
      .map(|_| ra.gen())
      .map(|v: i16| (v & luma_max) - (luma_max >> 1))
      .collect();
1075 1076 1077 1078 1079 1080
    let alpha = -1 as i16;

    (above, left, ac, alpha, o1, o2)
  }

  fn do_cfl_pred(ra: &mut ChaChaRng) -> (Vec<u16>, Vec<u16>) {
1081
    let (above, left, ac, alpha, mut o1, mut o2) = setup_cfl_pred(ra, 8);
1082 1083 1084

    pred_dc_4x4(&mut o1, 32, &above[..4], &left[..4]);
    pred_cfl_4x4(&mut o1, 32, &ac, alpha, 8);
1085 1086

    Block4x4::pred_cfl(&mut o2, 32, &ac, alpha, 8, &above[..4], &left[..4]);
1087 1088 1089 1090

    (o1, o2)
  }

Michael Bebenita's avatar
Michael Bebenita committed
1091 1092 1093 1094 1095
  fn assert_same(o2: Vec<u16>) {
    for l in o2.chunks(32).take(4) {
      for v in l[..4].windows(2) {
        assert_eq!(v[0], v[1]);
      }
1096
    }
Michael Bebenita's avatar
Michael Bebenita committed
1097
  }
1098

Michael Bebenita's avatar
Michael Bebenita committed
1099 1100
  #[test]
  fn pred_matches() {
Thomas Daede's avatar
Thomas Daede committed
1101
    let mut ra = ChaChaRng::from_seed([0; 32]);
Michael Bebenita's avatar
Michael Bebenita committed
1102 1103 1104
    for _ in 0..MAX_ITER {
      let (o1, o2) = do_dc_pred(&mut ra);
      assert_eq!(o1, o2);
Luca Barbato's avatar
Luca Barbato committed
1105

1106 1107 1108
      let (o1, o2) = do_dc_left_pred(&mut ra);
      assert_eq!(o1, o2);

1109 1110 1111
      let (o1, o2) = do_dc_top_pred(&mut ra);
      assert_eq!(o1, o2);

Michael Bebenita's avatar
Michael Bebenita committed
1112 1113
      let (o1, o2) = do_h_pred(&mut ra);
      assert_eq!(o1, o2);
Luc Trudeau's avatar
Luc Trudeau committed
1114

Michael Bebenita's avatar
Michael Bebenita committed
1115 1116
      let (o1, o2) = do_v_pred(&mut ra);
      assert_eq!(o1, o2);
1117

Michael Bebenita's avatar
Michael Bebenita committed
1118 1119
      let (o1, o2) = do_paeth_pred(&mut ra);
      assert_eq!(o1, o2);
Raphael Zumer's avatar
Raphael Zumer committed
1120

Michael Bebenita's avatar
Michael Bebenita committed
1121 1122
      let (o1, o2) = do_smooth_pred(&mut ra);
      assert_eq!(o1, o2);
1123

Michael Bebenita's avatar
Michael Bebenita committed
1124 1125
      let (o1, o2) = do_smooth_h_pred(&mut ra);
      assert_eq!(o1, o2);
1126

Michael Bebenita's avatar
Michael Bebenita committed
1127 1128
      let (o1, o2) = do_smooth_v_pred(&mut ra);
      assert_eq!(o1, o2);
1129 1130 1131

      let (o1, o2) = do_cfl_pred(&mut ra);
      assert_eq!(o1, o2);
Luca Barbato's avatar
Luca Barbato committed
1132
    }
Michael Bebenita's avatar
Michael Bebenita committed
1133
  }
Luca Barbato's avatar
Luca Barbato committed
1134

David Michael Barr's avatar
David Michael Barr committed
1135 1136
  #[test]
  fn pred_matches_u8() {
1137 1138 1139
    let mut edge_buf: AlignedArray<[u8; 2 * MAX_TX_SIZE + 1]> =
      UninitializedAlignedArray();
    for i in 0..edge_buf.array.len() {
1140
      edge_buf.array[i] = (i + 32).saturating_sub(MAX_TX_SIZE).as_();
David Michael Barr's avatar
David Michael Barr committed
1141
    }
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
    let left = &edge_buf.array[MAX_TX_SIZE - 4..MAX_TX_SIZE];
    let above = &edge_buf.array[MAX_TX_SIZE + 1..MAX_TX_SIZE + 5];
    let top_left = edge_buf.array[MAX_TX_SIZE];

    let stride = 4;
    let mut output = vec![0u8; 4 * 4];

    Block4x4::pred_dc(&mut output, stride, above, left);
    assert_eq!(output, [32u8; 16]);

    Block4x4::pred_dc_top(&mut output, stride, above, left);
    assert_eq!(output, [35u8; 16]);

    Block4x4::pred_dc_left(&mut output, stride, above, left);
    assert_eq!(output, [30u8; 16]);

    Block4x4::pred_dc_128(&mut output, stride, 8);
    assert_eq!(output, [128u8; 16]);

    Block4x4::pred_v(&mut output, stride, above);
    assert_eq!(
      output,
      [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36]
    );

    Block4x4::pred_h(&mut output, stride, left);
    assert_eq!(
      output,
      [31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28]
    );

    Block4x4::pred_paeth(&mut output, stride, above, left, top_left);
    assert_eq!(
      output,
      [32, 34, 35, 36, 30, 32, 32, 36, 29, 32, 32, 32, 28, 28, 32, 32]
    );

    Block4x4::pred_smooth(&mut output, stride, above, left);
    assert_eq!(
      output,
      [32, 34, 35, 35, 30, 32, 33, 34, 29, 31, 32, 32, 29, 30, 32, 32]
    );

    Block4x4::pred_smooth_h(&mut output, stride, above, left);
    assert_eq!(
      output,
      [31, 33, 34, 35, 30, 33, 34, 35, 29, 32, 34, 34, 28, 31, 33, 34]
    );

    Block4x4::pred_smooth_v(&mut output, stride, above, left);
    assert_eq!(
      output,
      [33, 34, 35, 36, 31, 31, 32, 33, 30, 30, 30, 31, 29, 30, 30, 30]
    );
David Michael Barr's avatar
David Michael Barr committed
1196 1197
  }

Michael Bebenita's avatar
Michael Bebenita committed
1198 1199
  #[test]
  fn pred_same() {
Thomas Daede's avatar
Thomas Daede committed
1200
    let mut ra