me.rs 34.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
19
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::plane::*;
21
use crate::tiling::*;
22
use crate::util::Pixel;
23

Romain Vimont's avatar
Romain Vimont committed
24
use std::ops::{Index, IndexMut};
25
use std::sync::Arc;
26

27
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
28
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
29 30
  use crate::plane::*;
  use crate::util::*;
31
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
32

Raphaël Zumer's avatar
Raphaël Zumer committed
33 34
  use libc;

35 36 37 38 39 40 41 42 43
  macro_rules! declare_asm_sad {
    ($(($name: ident, $T: ident)),+) => (
      $(
        extern { fn $name (
          src: *const $T, src_stride: libc::ptrdiff_t, dst: *const $T,
          dst_stride: libc::ptrdiff_t
        ) -> u32; }
      )+
    )
Luca Barbato's avatar
Luca Barbato committed
44
  }
Kyle Siefring's avatar
Kyle Siefring committed
45

46
  declare_asm_sad![
47
    // SSSE3
48 49 50 51 52 53 54
    (rav1e_sad_4x4_hbd_ssse3, u16),
    (rav1e_sad_8x8_hbd10_ssse3, u16),
    (rav1e_sad_16x16_hbd_ssse3, u16),
    (rav1e_sad_32x32_hbd10_ssse3, u16),
    (rav1e_sad_64x64_hbd10_ssse3, u16),
    (rav1e_sad_128x128_hbd10_ssse3, u16),

55
    // SSE2
56
    (rav1e_sad4x4_sse2, u8),
57 58 59 60
    (rav1e_sad4x8_sse2, u8),
    (rav1e_sad4x16_sse2, u8),

    (rav1e_sad8x4_sse2, u8),
61
    (rav1e_sad8x8_sse2, u8),
62 63 64
    (rav1e_sad8x16_sse2, u8),
    (rav1e_sad8x32_sse2, u8),

65
    (rav1e_sad16x16_sse2, u8),
66

67
    (rav1e_sad32x32_sse2, u8),
68

69
    (rav1e_sad64x64_sse2, u8),
70

71 72
    (rav1e_sad128x128_sse2, u8),

73 74 75
    // AVX
    (rav1e_sad16x4_avx2, u8),
    (rav1e_sad16x8_avx2, u8),
76
    (rav1e_sad16x16_avx2, u8),
77 78 79 80 81
    (rav1e_sad16x32_avx2, u8),
    (rav1e_sad16x64_avx2, u8),

    (rav1e_sad32x8_avx2, u8),
    (rav1e_sad32x16_avx2, u8),
82
    (rav1e_sad32x32_avx2, u8),
83 84 85 86
    (rav1e_sad32x64_avx2, u8),

    (rav1e_sad64x16_avx2, u8),
    (rav1e_sad64x32_avx2, u8),
87
    (rav1e_sad64x64_avx2, u8),
88 89 90
    (rav1e_sad64x128_avx2, u8),

    (rav1e_sad128x64_avx2, u8),
91 92 93
    (rav1e_sad128x128_avx2, u8)
  ];

Luca Barbato's avatar
Luca Barbato committed
94
  #[target_feature(enable = "ssse3")]
95
  unsafe fn sad_hbd_ssse3(
96 97
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_w: usize,
    blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
98 99
  ) -> u32 {
    let mut sum = 0 as u32;
100 101
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
118 119
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
120 121 122
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
123 124
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
125
    }
126
    sum
Kyle Siefring's avatar
Kyle Siefring committed
127 128
  }

129
  #[target_feature(enable = "sse2")]
130
  unsafe fn sad_sse2(
131 132
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
133
  ) -> u32 {
134 135
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
136 137
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
138 139 140 141 142 143 144
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
145
    let step_size = blk_h.min(blk_w).min(ptr_align);
146 147 148 149 150 151 152 153 154
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
155 156 157 158 159
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
160 161 162 163 164 165
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

166 167
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
168 169
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
170
  ) -> u32 {
David Michael Barr's avatar
David Michael Barr committed
171 172
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
173 174
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205

    let func = match (blk_w, blk_h) {
      (4, 4) => rav1e_sad4x4_sse2,
      (4, 8) => rav1e_sad4x8_sse2,
      (4, 16) => rav1e_sad4x16_sse2,

      (8, 4) => rav1e_sad8x4_sse2,
      (8, 8) => rav1e_sad8x8_sse2,
      (8, 16) => rav1e_sad8x16_sse2,
      (8, 32) => rav1e_sad8x32_sse2,

      (16, 4) => rav1e_sad16x4_avx2,
      (16, 8) => rav1e_sad16x8_avx2,
      (16, 16) => rav1e_sad16x16_avx2,
      (16, 32) => rav1e_sad16x32_avx2,
      (16, 64) => rav1e_sad16x64_avx2,

      (32, 8) => rav1e_sad32x8_avx2,
      (32, 16) => rav1e_sad32x16_avx2,
      (32, 32) => rav1e_sad32x32_avx2,
      (32, 64) => rav1e_sad32x64_avx2,

      (64, 16) => rav1e_sad64x16_avx2,
      (64, 32) => rav1e_sad64x32_avx2,
      (64, 64) => rav1e_sad64x64_avx2,
      (64, 128) => rav1e_sad64x128_avx2,

      (128, 64) => rav1e_sad128x64_avx2,
      (128, 128) => rav1e_sad128x128_avx2,

      _ => unreachable!()
206
    };
207 208
    func(org_ptr, org_stride, ref_ptr, ref_stride)

209 210
  }

Luca Barbato's avatar
Luca Barbato committed
211
  #[inline(always)]
212
  pub fn get_sad<T: Pixel>(
213
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize, blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
214
  ) -> u32 {
215
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
216
    {
217
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
218
        return unsafe {
219 220
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
221
          sad_hbd_ssse3(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
222 223
        };
      }
224
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") && blk_h >= 4 && blk_w >= 4 {
225 226 227
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
228
          sad_avx2(plane_org, plane_ref, blk_w, blk_h)
229 230
        };
      }
231 232
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
233 234
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
235
          sad_sse2(plane_org, plane_ref, blk_w, blk_h)
236 237
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
238
    }
239
    super::native::get_sad(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
240
  }
241 242 243
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
244
  use crate::plane::*;
245
  use crate::util::*;
246

Luca Barbato's avatar
Luca Barbato committed
247
  #[inline(always)]
248
  pub fn get_sad<T: Pixel>(
249 250
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize,
    blk_h: usize, _bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
251 252
  ) -> u32 {
    let mut sum = 0 as u32;
253

Luca Barbato's avatar
Luca Barbato committed
254 255
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
256

Luca Barbato's avatar
Luca Barbato committed
257
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
258 259 260
      sum += slice_org
        .iter()
        .zip(slice_ref)
261
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
262
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
263
    }
264

Luca Barbato's avatar
Luca Barbato committed
265 266
    sum
  }
267 268
}

Romain Vimont's avatar
Romain Vimont committed
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
284 285 286 287 288 289 290 291 292 293

  #[inline(always)]
  pub fn as_tile_motion_vectors(&self) -> TileMotionVectors<'_> {
    TileMotionVectors::new(self, 0, 0, self.cols, self.rows)
  }

  #[inline(always)]
  pub fn as_tile_motion_vectors_mut(&mut self) -> TileMotionVectorsMut<'_> {
    TileMotionVectorsMut::new(self, 0, 0, self.cols, self.rows)
  }
Romain Vimont's avatar
Romain Vimont committed
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

311
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
312
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
313
) -> (isize, isize, isize, isize) {
314 315 316
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
317
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
318
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
319
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
320 321 322 323

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

324
pub fn get_subset_predictors<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
325
  bo: BlockOffset, cmv: MotionVector,
326
  frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
327
  ref_frame_id: usize
328 329 330
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

331 332 333 334 335 336
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

337 338
  // EPZS subset A and B predictors.

339
  let mut median_preds = Vec::new();
340
  if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
341
    let left = frame_mvs[bo.y][bo.x - 1];
342 343
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
344 345
  }
  if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
346
    let top = frame_mvs[bo.y - 1][bo.x];
347 348
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
349

350
    if bo.x < frame_mvs.cols - 1 {
Romain Vimont's avatar
Romain Vimont committed
351
      let top_right = frame_mvs[bo.y - 1][bo.x + 1];
352 353
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
354 355 356
    }
  }

357
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
358
    let mut median_mv = MotionVector::default();
359
    for mv in median_preds.iter() {
360 361
      median_mv = median_mv + *mv;
    }
362 363 364
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
365 366 367 368 369
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
370
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
371 372

    if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
373
      let left = prev_frame_mvs[bo.y][bo.x - 1];
374
      if !left.is_zero() { predictors.push(left); }
375 376
    }
    if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
377
      let top = prev_frame_mvs[bo.y - 1][bo.x];
378
      if !top.is_zero() { predictors.push(top); }
379
    }
380
    if bo.x < frame_mvs.cols - 1 {
Romain Vimont's avatar
Romain Vimont committed
381
      let right = prev_frame_mvs[bo.y][bo.x + 1];
382
      if !right.is_zero() { predictors.push(right); }
383
    }
384
    if bo.y < frame_mvs.rows - 1 {
Romain Vimont's avatar
Romain Vimont committed
385
      let bottom = prev_frame_mvs[bo.y + 1][bo.x];
386
      if !bottom.is_zero() { predictors.push(bottom); }
387 388
    }

389 390
    let previous = prev_frame_mvs[bo.y][bo.x];
    if !previous.is_zero() { predictors.push(previous); }
391 392 393 394 395
  }

  predictors
}

396
pub trait MotionEstimation {
397
  fn full_pixel_me<T: Pixel>(
398
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
399
    bo: BlockOffset, lambda: u32,
400
    cmv: MotionVector, pmv: [MotionVector; 2],
401 402
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
403
    lowest_cost: &mut u64, ref_frame: RefType
404
  );
405

406
  fn sub_pixel_me<T: Pixel>(
407
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
408
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
409 410
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
411
    lowest_cost: &mut u64, ref_frame: RefType
412 413
  );

414 415
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
416
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
417
    pmv: [MotionVector; 2]
418
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
419
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

435
        Self::full_pixel_me(fi, fs, rec, bo, lambda, cmv, pmv,
436 437
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
438

439
        Self::sub_pixel_me(fi, fs, rec, bo, lambda, pmv,
440
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
441
                           &mut best_mv, &mut lowest_cost, ref_frame);
442 443

        best_mv
Frank Bossen's avatar
Frank Bossen committed
444 445
      }

446
      None => MotionVector::default()
447
    }
448
  }
449 450 451

  fn estimate_motion_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
452
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
453 454 455 456 457
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
      let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
Romain Vimont's avatar
Romain Vimont committed
458
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
459 460 461

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
      let frame_mvs = &fs.frame_mvs[ref_frame];
462
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
463 464 465 466 467 468 469 470

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
471 472
        fi, fs, pmvs, bo_adj,
        frame_mvs, frame_ref_opt, rec, global_mv, lambda,
473 474 475 476 477 478 479 480 481 482 483 484 485
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
    pmvs: &[Option<MotionVector>; 3], bo_adj_h: BlockOffset,
486
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
487
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
488 489 490 491
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
492
}
493

494 495 496
pub struct DiamondSearch {}
pub struct FullSearch {}

497 498
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
499
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
500
    bo: BlockOffset, lambda: u32,
501 502
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
503
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
504
  ) {
Thomas Daede's avatar
Thomas Daede committed
505
    let frame_mvs = &fs.frame_mvs[ref_frame.to_index()];
506
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
507
    let predictors =
508
      get_subset_predictors(bo, cmv, frame_mvs, frame_ref, ref_frame.to_index());
509 510 511

    diamond_me_search(
      fi,
512
      bo.to_luma_plane_offset(),
513 514 515 516 517 518 519 520 521 522 523 524 525 526
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
527
      false,
528 529 530
      ref_frame
    );
  }
531 532

  fn sub_pixel_me<T: Pixel>(
533
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
534
    bo: BlockOffset, lambda: u32,
535 536
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
537
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
538 539 540 541 542
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
543
      bo.to_luma_plane_offset(),
544 545 546 547 548 549 550 551 552 553 554 555 556 557
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
558
      true,
559 560 561
      ref_frame
    );
  }
562 563 564

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
565
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
566
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
567
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
568 569 570 571
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
572 573 574 575
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
576 577 578
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
579
          bo_adj,
580 581 582 583 584 585 586 587 588 589
          MotionVector{row: pmv.row, col: pmv.col},
          &frame_mvs, frame_ref_opt, 0
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
590
          fi, po,
591 592 593 594 595 596
          &fs.input_hres, &rec.input_hres,
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
597
          false, LAST_FRAME
598 599 600 601
        );
      }
    }
  }
602 603 604 605
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
606
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
607
    bo: BlockOffset, lambda: u32,
608 609
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
610
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
611
  ) {
612
    let po = bo.to_luma_plane_offset();
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
634
      po,
635 636 637 638 639 640 641
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
642 643

  fn sub_pixel_me<T: Pixel>(
644
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &ReferenceFrame<T>,
645
    bo: BlockOffset, lambda: u32,
646
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
647
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
648
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
649 650 651 652 653
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
654
      bo.to_luma_plane_offset(),
655 656 657 658 659 660 661
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
662 663
      blk_w,
      blk_h,
664 665 666 667
      best_mv,
      lowest_cost
    );
  }
668 669 670

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
671
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
672
    _frame_mvs: &FrameMotionVectors, _frame_ref_opt: Option<&ReferenceFrame<T>>,
673
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
674 675 676 677
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
678 679 680 681
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
700
          po,
701 702 703 704 705 706 707 708 709
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
710
}
711

712 713
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
714
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
715
  predictors: &[MotionVector],
716
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
717 718
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
719
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
720
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
721
  *center_mv = MotionVector::default();
722 723 724 725 726 727
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
728
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
729 730 731 732 733 734 735 736

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

737 738
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
739
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
740
  predictors: &[MotionVector],
741
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
742 743
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
744
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
745
  subpixel: bool, ref_frame: RefType)
746 747
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
748 749
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
750
      // Sub-pixel motion estimation
751 752 753 754 755
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
756 757
    } else {
      // Full pixel motion estimation
758
      (16i16, 8i16, None)
759 760
    }
  };
761 762 763 764

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
765
    blk_w, blk_h, center_mv, center_mv_cost,
766
    &mut tmp_plane_opt, ref_frame);
767 768 769

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
770
    let mut best_diamond_mv = MotionVector::default();
771 772 773 774 775 776 777 778 779

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
780
          fi, po, p_org, p_ref, bit_depth,
781
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
782
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
783 784 785 786 787 788 789 790

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
791
      if diamond_radius == diamond_radius_end {
792 793 794 795 796 797 798 799 800 801 802 803 804 805
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

806 807
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
808
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
809
  pmv: [MotionVector; 2], lambda: u32,
810 811
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
812
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
813
  ref_frame: RefType) -> u64
814 815 816 817 818 819 820 821 822 823
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

824
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
825
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
826 827 828
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
829
      po,
830 831 832 833 834 835
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
836
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
837 838 839 840 841 842
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
843
    let plane_ref = p_ref.slice(PlaneOffset {
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
861
  let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
862 863 864 865 866 867 868 869

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

870
fn telescopic_subpel_search<T: Pixel>(
871
  fi: &FrameInvariants<T>, fs: &FrameState<T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
872
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
873
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
874
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
875
  best_mv: &mut MotionVector, lowest_cost: &mut u64
876 877 878 879 880 881 882 883
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
884 885
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
909
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
910 911 912 913

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
914
            po,
915 916 917 918 919 920 921 922
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
923 924
        let plane_org = fs.input.planes[0].slice(po);
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
925

926
        let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, fi.sequence.bit_depth);
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

942
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
943
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
944
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
945
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
946
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
947
) {
Luca Barbato's avatar
Luca Barbato committed
948 949 950 951 952
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
953
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
954
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
955

956
      let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
957

Frank Bossen's avatar
Frank Bossen committed
958 959 960 961 962 963 964 965
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
966
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
967

Luca Barbato's avatar
Luca Barbato committed
968 969 970 971 972
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
973 974 975
}

// Adjust block offset such that entire block lies within frame boundaries
Romain Vimont's avatar
Romain Vimont committed
976
fn adjust_bo<T: Pixel>(bo: BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
977 978 979 980 981 982
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

983
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
984
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
985
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
986 987 988 989 990 991 992 993 994 995 996 997
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

998 999
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
1000
  bo: BlockOffset
1001 1002 1003 1004 1005 1006 1007 1008 1009
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
1010

1011 1012
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
Romain Vimont's avatar
Romain Vimont committed
1013
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
1014 1015 1016 1017
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
1018

1019
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1020
    let mut best_mv = MotionVector::default();
1021

Frank Bossen's avatar
Frank Bossen committed
1022
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
1023
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
1024

1025
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
1026 1027 1028 1029 1030 1031 1032 1033 1034
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
1035
      &mut lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
1036
      po,
Kyle Siefring's avatar
Kyle Siefring committed
1037
      1,
1038
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
1039
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1040
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
1041
      fi.allow_high_precision_mv
1042 1043 1044 1045 1046 1047 1048 1049
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

1050 1051 1052
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
1053 1054
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
1055 1056

  // Generate plane data for get_sad_same()
1057
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {
1058 1059
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
1060 1061
    // Make the test pattern robust to data alignment
    let xpad_off = (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32;
Luca Barbato's avatar
Luca Barbato committed
1062

1063
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {