me.rs 34.4 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
19
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::plane::*;
21
use crate::util::Pixel;
22

Romain Vimont's avatar
Romain Vimont committed
23
use std::ops::{Index, IndexMut};
24
use std::sync::Arc;
25

26
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
27
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
28 29
  use crate::plane::*;
  use crate::util::*;
30
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
31

Raphaël Zumer's avatar
Raphaël Zumer committed
32 33
  use libc;

34 35 36 37 38 39 40 41 42
  macro_rules! declare_asm_sad {
    ($(($name: ident, $T: ident)),+) => (
      $(
        extern { fn $name (
          src: *const $T, src_stride: libc::ptrdiff_t, dst: *const $T,
          dst_stride: libc::ptrdiff_t
        ) -> u32; }
      )+
    )
Luca Barbato's avatar
Luca Barbato committed
43
  }
Kyle Siefring's avatar
Kyle Siefring committed
44

45
  declare_asm_sad![
46
    // SSSE3
47 48 49 50 51 52 53
    (rav1e_sad_4x4_hbd_ssse3, u16),
    (rav1e_sad_8x8_hbd10_ssse3, u16),
    (rav1e_sad_16x16_hbd_ssse3, u16),
    (rav1e_sad_32x32_hbd10_ssse3, u16),
    (rav1e_sad_64x64_hbd10_ssse3, u16),
    (rav1e_sad_128x128_hbd10_ssse3, u16),

54
    // SSE2
55
    (rav1e_sad4x4_sse2, u8),
56 57 58 59
    (rav1e_sad4x8_sse2, u8),
    (rav1e_sad4x16_sse2, u8),

    (rav1e_sad8x4_sse2, u8),
60
    (rav1e_sad8x8_sse2, u8),
61 62 63
    (rav1e_sad8x16_sse2, u8),
    (rav1e_sad8x32_sse2, u8),

64
    (rav1e_sad16x16_sse2, u8),
65

66
    (rav1e_sad32x32_sse2, u8),
67

68
    (rav1e_sad64x64_sse2, u8),
69

70 71
    (rav1e_sad128x128_sse2, u8),

72 73 74
    // AVX
    (rav1e_sad16x4_avx2, u8),
    (rav1e_sad16x8_avx2, u8),
75
    (rav1e_sad16x16_avx2, u8),
76 77 78 79 80
    (rav1e_sad16x32_avx2, u8),
    (rav1e_sad16x64_avx2, u8),

    (rav1e_sad32x8_avx2, u8),
    (rav1e_sad32x16_avx2, u8),
81
    (rav1e_sad32x32_avx2, u8),
82 83 84 85
    (rav1e_sad32x64_avx2, u8),

    (rav1e_sad64x16_avx2, u8),
    (rav1e_sad64x32_avx2, u8),
86
    (rav1e_sad64x64_avx2, u8),
87 88 89
    (rav1e_sad64x128_avx2, u8),

    (rav1e_sad128x64_avx2, u8),
90 91 92
    (rav1e_sad128x128_avx2, u8)
  ];

Luca Barbato's avatar
Luca Barbato committed
93
  #[target_feature(enable = "ssse3")]
94
  unsafe fn sad_hbd_ssse3(
95
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
96 97 98
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
99 100
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
117 118
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
119 120 121
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
122 123
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
124
    }
125
    sum
Kyle Siefring's avatar
Kyle Siefring committed
126 127
  }

128
  #[target_feature(enable = "sse2")]
129 130
  unsafe fn sad_sse2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
131 132
    blk_w: usize
  ) -> u32 {
133 134
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
135 136
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
137 138 139 140 141 142 143
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
144
    let step_size = blk_h.min(blk_w).min(ptr_align);
145 146 147 148 149 150 151 152 153
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
154 155 156 157 158
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
159 160 161 162 163 164
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

165 166 167 168 169
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
    blk_w: usize
  ) -> u32 {
David Michael Barr's avatar
David Michael Barr committed
170 171
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
172 173
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204

    let func = match (blk_w, blk_h) {
      (4, 4) => rav1e_sad4x4_sse2,
      (4, 8) => rav1e_sad4x8_sse2,
      (4, 16) => rav1e_sad4x16_sse2,

      (8, 4) => rav1e_sad8x4_sse2,
      (8, 8) => rav1e_sad8x8_sse2,
      (8, 16) => rav1e_sad8x16_sse2,
      (8, 32) => rav1e_sad8x32_sse2,

      (16, 4) => rav1e_sad16x4_avx2,
      (16, 8) => rav1e_sad16x8_avx2,
      (16, 16) => rav1e_sad16x16_avx2,
      (16, 32) => rav1e_sad16x32_avx2,
      (16, 64) => rav1e_sad16x64_avx2,

      (32, 8) => rav1e_sad32x8_avx2,
      (32, 16) => rav1e_sad32x16_avx2,
      (32, 32) => rav1e_sad32x32_avx2,
      (32, 64) => rav1e_sad32x64_avx2,

      (64, 16) => rav1e_sad64x16_avx2,
      (64, 32) => rav1e_sad64x32_avx2,
      (64, 64) => rav1e_sad64x64_avx2,
      (64, 128) => rav1e_sad64x128_avx2,

      (128, 64) => rav1e_sad128x64_avx2,
      (128, 128) => rav1e_sad128x128_avx2,

      _ => unreachable!()
205
    };
206 207
    func(org_ptr, org_stride, ref_ptr, ref_stride)

208 209
  }

Luca Barbato's avatar
Luca Barbato committed
210
  #[inline(always)]
211 212
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
213 214
    blk_w: usize, bit_depth: usize
  ) -> u32 {
215
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
216
    {
217
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
218
        return unsafe {
219 220
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
221
          sad_hbd_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
222 223
        };
      }
224 225 226 227 228 229 230
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
          sad_avx2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
231 232
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
233 234
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
235 236 237
          sad_sse2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
238
    }
Luca Barbato's avatar
Luca Barbato committed
239
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
240
  }
241 242 243
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
244
  use crate::plane::*;
245
  use crate::util::*;
246

Luca Barbato's avatar
Luca Barbato committed
247
  #[inline(always)]
248 249
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
250 251 252
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
253

Luca Barbato's avatar
Luca Barbato committed
254 255
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
256

Luca Barbato's avatar
Luca Barbato committed
257
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
258 259 260
      sum += slice_org
        .iter()
        .zip(slice_ref)
261
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
262
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
263
    }
264

Luca Barbato's avatar
Luca Barbato committed
265 266
    sum
  }
267 268
}

Romain Vimont's avatar
Romain Vimont committed
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

301
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
302
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
303
) -> (isize, isize, isize, isize) {
304 305 306
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
307
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
308
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
309
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
310 311 312 313

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

314
pub fn get_subset_predictors<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
315
  bo: BlockOffset, cmv: MotionVector,
316
  w_in_b: usize, h_in_b: usize,
317
  frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
318
  ref_frame_id: usize
319 320 321
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

322 323 324 325 326 327
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

328 329
  // EPZS subset A and B predictors.

330
  let mut median_preds = Vec::new();
331
  if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
332
    let left = frame_mvs[bo.y][bo.x - 1];
333 334
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
335 336
  }
  if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
337
    let top = frame_mvs[bo.y - 1][bo.x];
338 339
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
340

341
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
342
      let top_right = frame_mvs[bo.y - 1][bo.x + 1];
343 344
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
345 346 347
    }
  }

348
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
349
    let mut median_mv = MotionVector::default();
350
    for mv in median_preds.iter() {
351 352
      median_mv = median_mv + *mv;
    }
353 354 355
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
356 357 358 359 360
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
361
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
362 363

    if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
364
      let left = prev_frame_mvs[bo.y][bo.x - 1];
365
      if !left.is_zero() { predictors.push(left); }
366 367
    }
    if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
368
      let top = prev_frame_mvs[bo.y - 1][bo.x];
369
      if !top.is_zero() { predictors.push(top); }
370
    }
371
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
372
      let right = prev_frame_mvs[bo.y][bo.x + 1];
373
      if !right.is_zero() { predictors.push(right); }
374
    }
375
    if bo.y < h_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
376
      let bottom = prev_frame_mvs[bo.y + 1][bo.x];
377
      if !bottom.is_zero() { predictors.push(bottom); }
378 379
    }

380 381
    let previous = prev_frame_mvs[bo.y][bo.x];
    if !previous.is_zero() { predictors.push(previous); }
382 383 384 385 386
  }

  predictors
}

387
pub trait MotionEstimation {
388
  fn full_pixel_me<T: Pixel>(
389
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
390
    bo: BlockOffset, lambda: u32,
391
    cmv: MotionVector, pmv: [MotionVector; 2],
392 393
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
394
    lowest_cost: &mut u64, ref_frame: RefType
395
  );
396

397
  fn sub_pixel_me<T: Pixel>(
398
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
399
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
400 401
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
402
    lowest_cost: &mut u64, ref_frame: RefType
403 404
  );

405 406
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
407
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
408
    pmv: [MotionVector; 2]
409
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
410
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

426
        Self::full_pixel_me(fi, fs, rec, bo, lambda, cmv, pmv,
427 428
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
429

430
        Self::sub_pixel_me(fi, fs, rec, bo, lambda, pmv,
431
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
432
                           &mut best_mv, &mut lowest_cost, ref_frame);
433 434

        best_mv
Frank Bossen's avatar
Frank Bossen committed
435 436
      }

437
      None => MotionVector::default()
438
    }
439
  }
440 441 442

  fn estimate_motion_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
443
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
444 445 446 447 448
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
      let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
Romain Vimont's avatar
Romain Vimont committed
449
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
450 451 452

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
      let frame_mvs = &fs.frame_mvs[ref_frame];
453
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
454 455 456 457 458 459 460 461

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
462 463
        fi, fs, pmvs, bo_adj,
        frame_mvs, frame_ref_opt, rec, global_mv, lambda,
464 465 466 467 468 469 470 471 472 473 474 475 476
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
    pmvs: &[Option<MotionVector>; 3], bo_adj_h: BlockOffset,
477
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
478
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
479 480 481 482
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
483
}
484

485 486 487
pub struct DiamondSearch {}
pub struct FullSearch {}

488 489
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
490
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
491
    bo: BlockOffset, lambda: u32,
492 493
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
494
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
495
  ) {
Thomas Daede's avatar
Thomas Daede committed
496
    let frame_mvs = &fs.frame_mvs[ref_frame.to_index()];
497
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
498
    let predictors =
Thomas Daede's avatar
Thomas Daede committed
499
      get_subset_predictors(bo, cmv, fi.w_in_b, fi.h_in_b, frame_mvs, frame_ref, ref_frame.to_index());
500 501 502

    diamond_me_search(
      fi,
503
      bo.to_luma_plane_offset(),
504 505 506 507 508 509 510 511 512 513 514 515 516 517
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
518
      false,
519 520 521
      ref_frame
    );
  }
522 523

  fn sub_pixel_me<T: Pixel>(
524
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
525
    bo: BlockOffset, lambda: u32,
526 527
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
528
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
529 530 531 532 533
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
534
      bo.to_luma_plane_offset(),
535 536 537 538 539 540 541 542 543 544 545 546 547 548
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
549
      true,
550 551 552
      ref_frame
    );
  }
553 554 555

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
556
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
557
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
558
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
559 560 561 562
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
563 564 565 566
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
567 568 569
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
570
          bo_adj,
571 572 573 574 575 576 577 578 579 580 581
          MotionVector{row: pmv.row, col: pmv.col},
          fi.w_in_b, fi.h_in_b,
          &frame_mvs, frame_ref_opt, 0
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
582
          fi, po,
583 584 585 586 587 588
          &fs.input_hres, &rec.input_hres,
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
589
          false, LAST_FRAME
590 591 592 593
        );
      }
    }
  }
594 595 596 597
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
598
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
599
    bo: BlockOffset, lambda: u32,
600 601
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
602
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
603
  ) {
604
    let po = bo.to_luma_plane_offset();
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
626
      po,
627 628 629 630 631 632 633
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
634 635

  fn sub_pixel_me<T: Pixel>(
636
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &ReferenceFrame<T>,
637
    bo: BlockOffset, lambda: u32,
638
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
639
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
640
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
641 642 643 644 645
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
646
      bo.to_luma_plane_offset(),
647 648 649 650 651 652 653
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
654 655
      blk_w,
      blk_h,
656 657 658 659
      best_mv,
      lowest_cost
    );
  }
660 661 662

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
663
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
664
    _frame_mvs: &FrameMotionVectors, _frame_ref_opt: Option<&ReferenceFrame<T>>,
665
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
666 667 668 669
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
670 671 672 673
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
692
          po,
693 694 695 696 697 698 699 700 701
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
702
}
703

704 705
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
706
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
707
  predictors: &[MotionVector],
708
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
709 710
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
711
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
712
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
713
  *center_mv = MotionVector::default();
714 715 716 717 718 719
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
720
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
721 722 723 724 725 726 727 728

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

729 730
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
731
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
732
  predictors: &[MotionVector],
733
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
734 735
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
736
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
737
  subpixel: bool, ref_frame: RefType)
738 739
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
740 741
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
742
      // Sub-pixel motion estimation
743 744 745 746 747
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
748 749
    } else {
      // Full pixel motion estimation
750
      (16i16, 8i16, None)
751 752
    }
  };
753 754 755 756

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
757
    blk_w, blk_h, center_mv, center_mv_cost,
758
    &mut tmp_plane_opt, ref_frame);
759 760 761

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
762
    let mut best_diamond_mv = MotionVector::default();
763 764 765 766 767 768 769 770 771

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
772
          fi, po, p_org, p_ref, bit_depth,
773
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
774
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
775 776 777 778 779 780 781 782

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
783
      if diamond_radius == diamond_radius_end {
784 785 786 787 788 789 790 791 792 793 794 795 796 797
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

798 799
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
800
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
801
  pmv: [MotionVector; 2], lambda: u32,
802 803
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
804
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
805
  ref_frame: RefType) -> u64
806 807 808 809 810 811 812 813 814 815
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

816
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
817
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
818 819 820
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
821
      po,
822 823 824 825 826 827
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
828
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
829 830 831 832 833 834
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
835
    let plane_ref = p_ref.slice(PlaneOffset {
836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
853 854 855 856 857 858 859 860 861
  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

862
fn telescopic_subpel_search<T: Pixel>(
863
  fi: &FrameInvariants<T>, fs: &FrameState<T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
864
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
865
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
866
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
867
  best_mv: &mut MotionVector, lowest_cost: &mut u64
868 869 870 871 872 873 874 875
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
876 877
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
901
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
902 903 904 905

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
906
            po,
907 908 909 910 911 912 913 914
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
915 916
        let plane_org = fs.input.planes[0].slice(po);
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933

        let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

934
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
935
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
936
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
937
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
938
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
939
) {
Luca Barbato's avatar
Luca Barbato committed
940 941 942 943 944
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
945
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
946
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
947

Kyle Siefring's avatar
Kyle Siefring committed
948
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
949

Frank Bossen's avatar
Frank Bossen committed
950 951 952 953 954 955 956 957
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
958
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
959

Luca Barbato's avatar
Luca Barbato committed
960 961 962 963 964
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
965 966 967
}

// Adjust block offset such that entire block lies within frame boundaries
Romain Vimont's avatar
Romain Vimont committed
968
fn adjust_bo<T: Pixel>(bo: BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
969 970 971 972 973 974
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

975
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
976
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
977
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
978 979 980 981 982 983 984 985 986 987 988 989
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

990 991
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
992
  bo: BlockOffset
993 994 995 996 997 998 999 1000 1001
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
1002

1003 1004
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
Romain Vimont's avatar
Romain Vimont committed
1005
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
1006 1007 1008 1009
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
1010

1011
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1012
    let mut best_mv = MotionVector::default();
1013

Frank Bossen's avatar
Frank Bossen committed
1014
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
1015
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
1016

1017
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
1018 1019 1020 1021 1022 1023 1024 1025 1026
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
1027
      &mut lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
1028
      po,
Kyle Siefring's avatar
Kyle Siefring committed
1029
      1,
1030
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
1031
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1032
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
1033
      fi.allow_high_precision_mv
1034 1035 1036 1037 1038 1039 1040 1041
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

1042 1043 1044
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
1045 1046
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
1047 1048

  // Generate plane data for get_sad_same()
1049
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {
1050 1051
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
1052 1053
    // Make the test pattern robust to data alignment
    let xpad_off = (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32;
Luca Barbato's avatar
Luca Barbato committed
1054

1055
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
1056
      for (j, pixel) in row.into_iter().enumerate() {
1057
        let val = (j + i) as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
1058
        assert!(val >= u8::min_value().into() &&
1059
            val <= u8::max_value().into());
1060
        *pixel = T::cast_from(val);
1061 1062 1063 1064
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
1065
      for (j, pixel) in row.into_iter().enumerate() {
1066
        let val = j as i32 - i as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
1067
        assert!(val >= u8::min_value().into() &&
1068
            val <= u8::max_value().into());
1069
        *pixel = T::cast_from(val);
1070 1071 1072 1073