me.rs 33.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
19
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::plane::*;
21
use crate::util::Pixel;
22

Romain Vimont's avatar
Romain Vimont committed
23
use std::ops::{Index, IndexMut};
24
use std::sync::Arc;
25

26
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
27
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
28 29
  use crate::plane::*;
  use crate::util::*;
30
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
31

Raphaël Zumer's avatar
Raphaël Zumer committed
32 33
  use libc;

34 35 36 37 38 39 40 41 42
  macro_rules! declare_asm_sad {
    ($(($name: ident, $T: ident)),+) => (
      $(
        extern { fn $name (
          src: *const $T, src_stride: libc::ptrdiff_t, dst: *const $T,
          dst_stride: libc::ptrdiff_t
        ) -> u32; }
      )+
    )
Luca Barbato's avatar
Luca Barbato committed
43
  }
Kyle Siefring's avatar
Kyle Siefring committed
44

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
  declare_asm_sad![
    (rav1e_sad_4x4_hbd_ssse3, u16),
    (rav1e_sad_8x8_hbd10_ssse3, u16),
    (rav1e_sad_16x16_hbd_ssse3, u16),
    (rav1e_sad_32x32_hbd10_ssse3, u16),
    (rav1e_sad_64x64_hbd10_ssse3, u16),
    (rav1e_sad_128x128_hbd10_ssse3, u16),

    (rav1e_sad4x4_sse2, u8),
    (rav1e_sad8x8_sse2, u8),
    (rav1e_sad16x16_sse2, u8),
    (rav1e_sad32x32_sse2, u8),
    (rav1e_sad64x64_sse2, u8),
    (rav1e_sad128x128_sse2, u8),

    (rav1e_sad16x16_avx2, u8),
    (rav1e_sad32x32_avx2, u8),
    (rav1e_sad64x64_avx2, u8),
    (rav1e_sad128x128_avx2, u8)
  ];

Luca Barbato's avatar
Luca Barbato committed
66
  #[target_feature(enable = "ssse3")]
67
  unsafe fn sad_hbd_ssse3(
68
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
69 70 71
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
72 73
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
90 91
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
92 93 94
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
95 96
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
97
    }
98
    sum
Kyle Siefring's avatar
Kyle Siefring committed
99 100
  }

101
  #[target_feature(enable = "sse2")]
102 103
  unsafe fn sad_sse2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
104 105
    blk_w: usize
  ) -> u32 {
106 107
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
108 109
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
110 111 112 113 114 115 116
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
117
    let step_size = blk_h.min(blk_w).min(ptr_align);
118 119 120 121 122 123 124 125 126
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
127 128 129 130 131
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
132 133 134 135 136 137
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

138 139 140 141 142
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
    blk_w: usize
  ) -> u32 {
David Michael Barr's avatar
David Michael Barr committed
143 144
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
145 146 147 148 149 150 151 152 153 154 155 156
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
    let step_size = blk_h.min(blk_w);
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_avx2,
      6 => rav1e_sad32x32_avx2,
      7 => rav1e_sad64x64_avx2,
      8 => rav1e_sad128x128_avx2,
      _ => rav1e_sad128x128_avx2
    };
David Michael Barr's avatar
David Michael Barr committed
157 158 159 160 161 162 163 164
    if blk_w == blk_h {
      return func(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
165 166 167 168 169 170
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

Luca Barbato's avatar
Luca Barbato committed
171
  #[inline(always)]
172 173
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
174 175
    blk_w: usize, bit_depth: usize
  ) -> u32 {
176
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
177
    {
178
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
179
        return unsafe {
180 181
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
182
          sad_hbd_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
183 184
        };
      }
185 186 187 188 189 190 191
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
          sad_avx2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
192 193
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
194 195
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
196 197 198
          sad_sse2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
199
    }
Luca Barbato's avatar
Luca Barbato committed
200
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
201
  }
202 203 204
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
205
  use crate::plane::*;
206
  use crate::util::*;
207

Luca Barbato's avatar
Luca Barbato committed
208
  #[inline(always)]
209 210
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
211 212 213
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
214

Luca Barbato's avatar
Luca Barbato committed
215 216
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
217

Luca Barbato's avatar
Luca Barbato committed
218
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
219 220 221
      sum += slice_org
        .iter()
        .zip(slice_ref)
222
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
223
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
224
    }
225

Luca Barbato's avatar
Luca Barbato committed
226 227
    sum
  }
228 229
}

Romain Vimont's avatar
Romain Vimont committed
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

262
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
263
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
264
) -> (isize, isize, isize, isize) {
265 266 267
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
268
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
269
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
270
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
271 272 273 274

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

275
pub fn get_subset_predictors<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
276
  bo: BlockOffset, cmv: MotionVector,
277
  w_in_b: usize, h_in_b: usize,
278
  frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
279
  ref_frame_id: usize
280 281 282
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

283 284 285 286 287 288
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

289 290
  // EPZS subset A and B predictors.

291
  let mut median_preds = Vec::new();
292
  if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
293
    let left = frame_mvs[bo.y][bo.x - 1];
294 295
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
296 297
  }
  if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
298
    let top = frame_mvs[bo.y - 1][bo.x];
299 300
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
301

302
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
303
      let top_right = frame_mvs[bo.y - 1][bo.x + 1];
304 305
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
306 307 308
    }
  }

309
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
310
    let mut median_mv = MotionVector::default();
311
    for mv in median_preds.iter() {
312 313
      median_mv = median_mv + *mv;
    }
314 315 316
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
317 318 319 320 321
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
322
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
323 324

    if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
325
      let left = prev_frame_mvs[bo.y][bo.x - 1];
326
      if !left.is_zero() { predictors.push(left); }
327 328
    }
    if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
329
      let top = prev_frame_mvs[bo.y - 1][bo.x];
330
      if !top.is_zero() { predictors.push(top); }
331
    }
332
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
333
      let right = prev_frame_mvs[bo.y][bo.x + 1];
334
      if !right.is_zero() { predictors.push(right); }
335
    }
336
    if bo.y < h_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
337
      let bottom = prev_frame_mvs[bo.y + 1][bo.x];
338
      if !bottom.is_zero() { predictors.push(bottom); }
339 340
    }

341 342
    let previous = prev_frame_mvs[bo.y][bo.x];
    if !previous.is_zero() { predictors.push(previous); }
343 344 345 346 347
  }

  predictors
}

348
pub trait MotionEstimation {
349
  fn full_pixel_me<T: Pixel>(
350
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
351
    bo: BlockOffset, lambda: u32,
352
    cmv: MotionVector, pmv: [MotionVector; 2],
353 354
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
355
    lowest_cost: &mut u64, ref_frame: RefType
356
  );
357

358
  fn sub_pixel_me<T: Pixel>(
359
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
360
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
361 362
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
363
    lowest_cost: &mut u64, ref_frame: RefType
364 365
  );

366 367
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
368
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
369
    pmv: [MotionVector; 2]
370
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
371
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

387
        Self::full_pixel_me(fi, fs, rec, bo, lambda, cmv, pmv,
388 389
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
390

391
        Self::sub_pixel_me(fi, fs, rec, bo, lambda, pmv,
392
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
393
                           &mut best_mv, &mut lowest_cost, ref_frame);
394 395

        best_mv
Frank Bossen's avatar
Frank Bossen committed
396 397
      }

398
      None => MotionVector::default()
399
    }
400
  }
401 402 403

  fn estimate_motion_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
404
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
405 406 407 408 409
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
      let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
Romain Vimont's avatar
Romain Vimont committed
410
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
411 412 413

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
      let frame_mvs = &fs.frame_mvs[ref_frame];
414
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
415 416 417 418 419 420 421 422

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
423 424
        fi, fs, pmvs, bo_adj,
        frame_mvs, frame_ref_opt, rec, global_mv, lambda,
425 426 427 428 429 430 431 432 433 434 435 436 437
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
    pmvs: &[Option<MotionVector>; 3], bo_adj_h: BlockOffset,
438
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
439
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
440 441 442 443
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
444
}
445

446 447 448
pub struct DiamondSearch {}
pub struct FullSearch {}

449 450
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
451
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
452
    bo: BlockOffset, lambda: u32,
453 454
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
455
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
456
  ) {
Thomas Daede's avatar
Thomas Daede committed
457
    let frame_mvs = &fs.frame_mvs[ref_frame.to_index()];
458
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
459
    let predictors =
Thomas Daede's avatar
Thomas Daede committed
460
      get_subset_predictors(bo, cmv, fi.w_in_b, fi.h_in_b, frame_mvs, frame_ref, ref_frame.to_index());
461 462 463

    diamond_me_search(
      fi,
464
      bo.to_luma_plane_offset(),
465 466 467 468 469 470 471 472 473 474 475 476 477 478
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
479
      false,
480 481 482
      ref_frame
    );
  }
483 484

  fn sub_pixel_me<T: Pixel>(
485
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
486
    bo: BlockOffset, lambda: u32,
487 488
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
489
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
490 491 492 493 494
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
495
      bo.to_luma_plane_offset(),
496 497 498 499 500 501 502 503 504 505 506 507 508 509
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
510
      true,
511 512 513
      ref_frame
    );
  }
514 515 516

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
517
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
518
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
519
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
520 521 522 523
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
524 525 526 527
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
528 529 530
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
531
          bo_adj,
532 533 534 535 536 537 538 539 540 541 542
          MotionVector{row: pmv.row, col: pmv.col},
          fi.w_in_b, fi.h_in_b,
          &frame_mvs, frame_ref_opt, 0
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
543
          fi, po,
544 545 546 547 548 549
          &fs.input_hres, &rec.input_hres,
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
550
          false, LAST_FRAME
551 552 553 554
        );
      }
    }
  }
555 556 557 558
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
559
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
560
    bo: BlockOffset, lambda: u32,
561 562
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
563
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
564
  ) {
565
    let po = bo.to_luma_plane_offset();
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
587
      po,
588 589 590 591 592 593 594
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
595 596

  fn sub_pixel_me<T: Pixel>(
597
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &ReferenceFrame<T>,
598
    bo: BlockOffset, lambda: u32,
599
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
600
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
601
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
602 603 604 605 606
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
607
      bo.to_luma_plane_offset(),
608 609 610 611 612 613 614
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
615 616
      blk_w,
      blk_h,
617 618 619 620
      best_mv,
      lowest_cost
    );
  }
621 622 623

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
624
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
625
    _frame_mvs: &FrameMotionVectors, _frame_ref_opt: Option<&ReferenceFrame<T>>,
626
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
627 628 629 630
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
631 632 633 634
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
653
          po,
654 655 656 657 658 659 660 661 662
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
663
}
664

665 666
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
667
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
668
  predictors: &[MotionVector],
669
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
670 671
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
672
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
673
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
674
  *center_mv = MotionVector::default();
675 676 677 678 679 680
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
681
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
682 683 684 685 686 687 688 689

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

690 691
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
692
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
693
  predictors: &[MotionVector],
694
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
695 696
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
697
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
698
  subpixel: bool, ref_frame: RefType)
699 700
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
701 702
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
703
      // Sub-pixel motion estimation
704 705 706 707 708
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
709 710
    } else {
      // Full pixel motion estimation
711
      (16i16, 8i16, None)
712 713
    }
  };
714 715 716 717

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
718
    blk_w, blk_h, center_mv, center_mv_cost,
719
    &mut tmp_plane_opt, ref_frame);
720 721 722

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
723
    let mut best_diamond_mv = MotionVector::default();
724 725 726 727 728 729 730 731 732

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
733
          fi, po, p_org, p_ref, bit_depth,
734
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
735
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
736 737 738 739 740 741 742 743

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
744
      if diamond_radius == diamond_radius_end {
745 746 747 748 749 750 751 752 753 754 755 756 757 758
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

759 760
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
761
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
762
  pmv: [MotionVector; 2], lambda: u32,
763 764
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
765
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
766
  ref_frame: RefType) -> u64
767 768 769 770 771 772 773 774 775 776
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

777
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
778
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
779 780 781
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
782
      po,
783 784 785 786 787 788
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
789
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
790 791 792 793 794 795
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
796
    let plane_ref = p_ref.slice(PlaneOffset {
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
814 815 816 817 818 819 820 821 822
  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

823
fn telescopic_subpel_search<T: Pixel>(
824
  fi: &FrameInvariants<T>, fs: &FrameState<T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
825
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
826
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
827
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
828
  best_mv: &mut MotionVector, lowest_cost: &mut u64
829 830 831 832 833 834 835 836
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
837 838
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
862
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
863 864 865 866

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
867
            po,
868 869 870 871 872 873 874 875
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
876 877
        let plane_org = fs.input.planes[0].slice(po);
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894

        let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

895
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
896
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
897
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
898
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
899
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
900
) {
Luca Barbato's avatar
Luca Barbato committed
901 902 903 904 905
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
906
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
907
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
908

Kyle Siefring's avatar
Kyle Siefring committed
909
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
910

Frank Bossen's avatar
Frank Bossen committed
911 912 913 914 915 916 917 918
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
919
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
920

Luca Barbato's avatar
Luca Barbato committed
921 922 923 924 925
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
926 927 928
}

// Adjust block offset such that entire block lies within frame boundaries
Romain Vimont's avatar
Romain Vimont committed
929
fn adjust_bo<T: Pixel>(bo: BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
930 931 932 933 934 935
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

936
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
937
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
938
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
939 940 941 942 943 944 945 946 947 948 949 950
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

951 952
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
953
  bo: BlockOffset
954 955 956 957 958 959 960 961 962
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
963

964 965
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
Romain Vimont's avatar
Romain Vimont committed
966
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
967 968 969 970
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
971

972
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
973
    let mut best_mv = MotionVector::default();
974

Frank Bossen's avatar
Frank Bossen committed
975
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
976
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
977

978
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
979 980 981 982 983 984 985 986 987
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
988
      &mut lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
989
      po,
Kyle Siefring's avatar
Kyle Siefring committed
990
      1,
991
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
992
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
993
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
994
      fi.allow_high_precision_mv
995 996 997 998 999 1000 1001 1002
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

1003 1004 1005
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
1006 1007
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
1008 1009

  // Generate plane data for get_sad_same()
1010
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {
1011 1012
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
1013 1014
    // Make the test pattern robust to data alignment
    let xpad_off = (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32;
Luca Barbato's avatar
Luca Barbato committed
1015

1016
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
1017
      for (j, pixel) in row.into_iter().enumerate() {
1018
        let val = (j + i) as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
1019
        assert!(val >= u8::min_value().into() &&
1020
            val <= u8::max_value().into());
1021
        *pixel = T::cast_from(val);
1022 1023 1024 1025
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
1026
      for (j, pixel) in row.into_iter().enumerate() {
1027
        let val = j as i32 - i as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
1028
        assert!(val >= u8::min_value().into() &&
1029
            val <= u8::max_value().into());
1030
        *pixel = T::cast_from(val);
1031 1032 1033 1034 1035 1036 1037
      }
    }

    (input_plane, rec_plane)
  }

  // Regression and validation test for SAD computation
1038
  fn get_sad_same_inner<T: Pixel>() {
1039
    let blocks: Vec<(BlockSize, u32)> = vec![
1040
      (BLOCK_4X4, 1912),
1041 1042
      (BLOCK_4X8, 4296),
      (BLOCK_8X4, 3496),
1043
      (BLOCK_8X8, 7824),
1044 1045
      (BLOCK_8X16, 16592),
      (BLOCK_16X8, 14416),
1046
      (BLOCK_16X16, 31136),
1047 1048
      (BLOCK_16X32, 60064),
      (BLOCK_32X16, 59552),
1049
      (BLOCK_32X32, 120128),
1050 1051
      (BLOCK_32X64, 186688),
      (BLOCK_64X32, 250176),
1052
      (BLOCK_64X64, 438912),
1053 1054
      (BLOCK_64X128, 654272),
      (BLOCK_128X64, 1016768),
1055
      (BLOCK_128X128, 1689792),
1056 1057 1058 1059 1060 1061
      (BLOCK_4X16, 8680),
      (BLOCK_16X4, 6664),
      (BLOCK_8X32, 31056),
      (BLOCK_32X8, 27600),
      (BLOCK_16X64, 93344),
      (BLOCK_64X16, 116384),
1062 1063
    ];

Kyle Siefring's avatar
Kyle Siefring committed
1064
    let bit_depth: usize = 8;
1065
    let (input_plane, rec_plane) = setup_sad::<T>();
1066 1067

    for block in blocks {
Kyle Siefring's avatar
Kyle Siefring committed
1068 1069
      let bsw = block.0.width();
      let bsh = block.0.height();
1070
      let po = PlaneOffset { x: 32, y: 40 };
1071

Romain Vimont's avatar
Romain Vimont committed
1072 1073
      let mut input_slice = input_plane.slice(po);
      let mut rec_slice = rec_plane.slice(po);
1074

</