me.rs 35 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
19
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::plane::*;
21
use crate::tiling::*;
22
use crate::util::Pixel;
23

Romain Vimont's avatar
Romain Vimont committed
24
use std::ops::{Index, IndexMut};
25
use std::sync::Arc;
26

27
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
28
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
29 30
  use crate::plane::*;
  use crate::util::*;
31
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
32

Raphaël Zumer's avatar
Raphaël Zumer committed
33 34
  use libc;

35 36 37 38 39 40 41 42 43
  macro_rules! declare_asm_sad {
    ($(($name: ident, $T: ident)),+) => (
      $(
        extern { fn $name (
          src: *const $T, src_stride: libc::ptrdiff_t, dst: *const $T,
          dst_stride: libc::ptrdiff_t
        ) -> u32; }
      )+
    )
Luca Barbato's avatar
Luca Barbato committed
44
  }
Kyle Siefring's avatar
Kyle Siefring committed
45

46
  declare_asm_sad![
47
    // SSSE3
48 49 50 51 52 53 54
    (rav1e_sad_4x4_hbd_ssse3, u16),
    (rav1e_sad_8x8_hbd10_ssse3, u16),
    (rav1e_sad_16x16_hbd_ssse3, u16),
    (rav1e_sad_32x32_hbd10_ssse3, u16),
    (rav1e_sad_64x64_hbd10_ssse3, u16),
    (rav1e_sad_128x128_hbd10_ssse3, u16),

55
    // SSE2
56
    (rav1e_sad4x4_sse2, u8),
57 58 59 60
    (rav1e_sad4x8_sse2, u8),
    (rav1e_sad4x16_sse2, u8),

    (rav1e_sad8x4_sse2, u8),
61
    (rav1e_sad8x8_sse2, u8),
62 63 64
    (rav1e_sad8x16_sse2, u8),
    (rav1e_sad8x32_sse2, u8),

65
    (rav1e_sad16x16_sse2, u8),
66

67
    (rav1e_sad32x32_sse2, u8),
68

69
    (rav1e_sad64x64_sse2, u8),
70

71 72
    (rav1e_sad128x128_sse2, u8),

73 74 75
    // AVX
    (rav1e_sad16x4_avx2, u8),
    (rav1e_sad16x8_avx2, u8),
76
    (rav1e_sad16x16_avx2, u8),
77 78 79 80 81
    (rav1e_sad16x32_avx2, u8),
    (rav1e_sad16x64_avx2, u8),

    (rav1e_sad32x8_avx2, u8),
    (rav1e_sad32x16_avx2, u8),
82
    (rav1e_sad32x32_avx2, u8),
83 84 85 86
    (rav1e_sad32x64_avx2, u8),

    (rav1e_sad64x16_avx2, u8),
    (rav1e_sad64x32_avx2, u8),
87
    (rav1e_sad64x64_avx2, u8),
88 89 90
    (rav1e_sad64x128_avx2, u8),

    (rav1e_sad128x64_avx2, u8),
91 92 93
    (rav1e_sad128x128_avx2, u8)
  ];

Luca Barbato's avatar
Luca Barbato committed
94
  #[target_feature(enable = "ssse3")]
95
  unsafe fn sad_hbd_ssse3(
96 97
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_w: usize,
    blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
98 99
  ) -> u32 {
    let mut sum = 0 as u32;
100 101
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
118 119
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
120 121 122
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
123 124
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
125
    }
126
    sum
Kyle Siefring's avatar
Kyle Siefring committed
127 128
  }

129
  #[target_feature(enable = "sse2")]
130
  unsafe fn sad_sse2(
131 132
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
133
  ) -> u32 {
134 135
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
136 137
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
138 139 140 141 142 143 144
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
145
    let step_size = blk_h.min(blk_w).min(ptr_align);
146 147 148 149 150 151 152 153 154
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
155 156 157 158 159
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
160 161 162 163 164 165
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

166 167
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
168 169
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
170
  ) -> u32 {
David Michael Barr's avatar
David Michael Barr committed
171 172
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
173 174
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205

    let func = match (blk_w, blk_h) {
      (4, 4) => rav1e_sad4x4_sse2,
      (4, 8) => rav1e_sad4x8_sse2,
      (4, 16) => rav1e_sad4x16_sse2,

      (8, 4) => rav1e_sad8x4_sse2,
      (8, 8) => rav1e_sad8x8_sse2,
      (8, 16) => rav1e_sad8x16_sse2,
      (8, 32) => rav1e_sad8x32_sse2,

      (16, 4) => rav1e_sad16x4_avx2,
      (16, 8) => rav1e_sad16x8_avx2,
      (16, 16) => rav1e_sad16x16_avx2,
      (16, 32) => rav1e_sad16x32_avx2,
      (16, 64) => rav1e_sad16x64_avx2,

      (32, 8) => rav1e_sad32x8_avx2,
      (32, 16) => rav1e_sad32x16_avx2,
      (32, 32) => rav1e_sad32x32_avx2,
      (32, 64) => rav1e_sad32x64_avx2,

      (64, 16) => rav1e_sad64x16_avx2,
      (64, 32) => rav1e_sad64x32_avx2,
      (64, 64) => rav1e_sad64x64_avx2,
      (64, 128) => rav1e_sad64x128_avx2,

      (128, 64) => rav1e_sad128x64_avx2,
      (128, 128) => rav1e_sad128x128_avx2,

      _ => unreachable!()
206
    };
207 208
    func(org_ptr, org_stride, ref_ptr, ref_stride)

209 210
  }

Luca Barbato's avatar
Luca Barbato committed
211
  #[inline(always)]
212
  pub fn get_sad<T: Pixel>(
213
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize, blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
214
  ) -> u32 {
215
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
216
    {
217
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
218
        return unsafe {
219 220
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
221
          sad_hbd_ssse3(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
222 223
        };
      }
224
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") && blk_h >= 4 && blk_w >= 4 {
225 226 227
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
228
          sad_avx2(plane_org, plane_ref, blk_w, blk_h)
229 230
        };
      }
231 232
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
233 234
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
235
          sad_sse2(plane_org, plane_ref, blk_w, blk_h)
236 237
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
238
    }
239
    super::native::get_sad(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
240
  }
241 242 243
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
244
  use crate::plane::*;
245
  use crate::util::*;
246

Luca Barbato's avatar
Luca Barbato committed
247
  #[inline(always)]
248
  pub fn get_sad<T: Pixel>(
249 250
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize,
    blk_h: usize, _bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
251 252
  ) -> u32 {
    let mut sum = 0 as u32;
253

Luca Barbato's avatar
Luca Barbato committed
254 255
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
256

Luca Barbato's avatar
Luca Barbato committed
257
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
258 259 260
      sum += slice_org
        .iter()
        .zip(slice_ref)
261
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
262
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
263
    }
264

Luca Barbato's avatar
Luca Barbato committed
265 266
    sum
  }
267 268
}

Romain Vimont's avatar
Romain Vimont committed
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
284 285 286 287 288 289 290 291 292 293

  #[inline(always)]
  pub fn as_tile_motion_vectors(&self) -> TileMotionVectors<'_> {
    TileMotionVectors::new(self, 0, 0, self.cols, self.rows)
  }

  #[inline(always)]
  pub fn as_tile_motion_vectors_mut(&mut self) -> TileMotionVectorsMut<'_> {
    TileMotionVectorsMut::new(self, 0, 0, self.cols, self.rows)
  }
Romain Vimont's avatar
Romain Vimont committed
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

311
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
312
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
313
) -> (isize, isize, isize, isize) {
314 315 316
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
317
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
318
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
319
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
320 321 322 323

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

324
pub fn get_subset_predictors<T: Pixel>(
325
  tile_bo: BlockOffset, cmv: MotionVector,
326
  tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
327
  ref_frame_id: usize
328 329 330
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

331 332 333 334 335 336
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

337 338
  // EPZS subset A and B predictors.

339
  let mut median_preds = Vec::new();
340 341
  if tile_bo.x > 0 {
    let left = tile_mvs[tile_bo.y][tile_bo.x - 1];
342 343
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
344
  }
345 346
  if tile_bo.y > 0 {
    let top = tile_mvs[tile_bo.y - 1][tile_bo.x];
347 348
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
349

350 351
    if tile_bo.x < tile_mvs.cols() - 1 {
      let top_right = tile_mvs[tile_bo.y - 1][tile_bo.x + 1];
352 353
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
354 355 356
    }
  }

357
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
358
    let mut median_mv = MotionVector::default();
359
    for mv in median_preds.iter() {
360 361
      median_mv = median_mv + *mv;
    }
362 363 364
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
365 366 367 368 369
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
370
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
371

372
    let frame_bo = BlockOffset {
373 374
      x: tile_mvs.x() + tile_bo.x,
      y: tile_mvs.y() + tile_bo.y,
375 376 377
    };
    if frame_bo.x > 0 {
      let left = prev_frame_mvs[frame_bo.y][frame_bo.x - 1];
378
      if !left.is_zero() { predictors.push(left); }
379
    }
380 381
    if frame_bo.y > 0 {
      let top = prev_frame_mvs[frame_bo.y - 1][frame_bo.x];
382
      if !top.is_zero() { predictors.push(top); }
383
    }
384 385
    if frame_bo.x < prev_frame_mvs.cols - 1 {
      let right = prev_frame_mvs[frame_bo.y][frame_bo.x + 1];
386
      if !right.is_zero() { predictors.push(right); }
387
    }
388 389
    if frame_bo.y < prev_frame_mvs.rows - 1 {
      let bottom = prev_frame_mvs[frame_bo.y + 1][frame_bo.x];
390
      if !bottom.is_zero() { predictors.push(bottom); }
391 392
    }

393
    let previous = prev_frame_mvs[frame_bo.y][frame_bo.x];
394
    if !previous.is_zero() { predictors.push(previous); }
395 396 397 398 399
  }

  predictors
}

400
pub trait MotionEstimation {
401
  fn full_pixel_me<T: Pixel>(
402
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
403
    bo: BlockOffset, lambda: u32,
404
    cmv: MotionVector, pmv: [MotionVector; 2],
405 406
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
407
    lowest_cost: &mut u64, ref_frame: RefType
408
  );
409

410
  fn sub_pixel_me<T: Pixel>(
411
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
412
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
413 414
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
415
    lowest_cost: &mut u64, ref_frame: RefType
416 417
  );

418 419
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
420
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
421
    pmv: [MotionVector; 2]
422
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
423
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

439
        Self::full_pixel_me(fi, fs, rec, bo, lambda, cmv, pmv,
440 441
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
442

443
        Self::sub_pixel_me(fi, fs, rec, bo, lambda, pmv,
444
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
445
                           &mut best_mv, &mut lowest_cost, ref_frame);
446 447

        best_mv
Frank Bossen's avatar
Frank Bossen committed
448 449
      }

450
      None => MotionVector::default()
451
    }
452
  }
453 454 455

  fn estimate_motion_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
456
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
457 458 459 460 461
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
      let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
Romain Vimont's avatar
Romain Vimont committed
462
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
463 464 465

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
      let frame_mvs = &fs.frame_mvs[ref_frame];
466
      let tile_mvs = frame_mvs.as_tile_motion_vectors();
467
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
468 469 470 471 472 473 474 475

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
476
        fi, fs, pmvs, bo_adj,
477
        &tile_mvs, frame_ref_opt, rec, global_mv, lambda,
478 479 480 481 482 483 484 485 486 487 488 489
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
490 491
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
    tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
492
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
493 494 495 496
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
497
}
498

499 500 501
pub struct DiamondSearch {}
pub struct FullSearch {}

502 503
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
504
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
505
    bo: BlockOffset, lambda: u32,
506 507
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
508
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
509
  ) {
Thomas Daede's avatar
Thomas Daede committed
510
    let frame_mvs = &fs.frame_mvs[ref_frame.to_index()];
511
    let tile_mvs = frame_mvs.as_tile_motion_vectors();
512
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
513
    let predictors =
514
      get_subset_predictors(bo, cmv, &tile_mvs, frame_ref, ref_frame.to_index());
515 516 517

    diamond_me_search(
      fi,
518
      bo.to_luma_plane_offset(),
519 520 521 522 523 524 525 526 527 528 529 530 531 532
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
533
      false,
534 535 536
      ref_frame
    );
  }
537 538

  fn sub_pixel_me<T: Pixel>(
539
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
540
    bo: BlockOffset, lambda: u32,
541 542
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
543
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
544 545 546 547 548
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
549
      bo.to_luma_plane_offset(),
550 551 552 553 554 555 556 557 558 559 560 561 562 563
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
564
      true,
565 566 567
      ref_frame
    );
  }
568 569 570

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
571
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
572
    tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
573
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
574 575 576 577
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
578 579 580 581
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
582 583 584
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
585
          bo_adj,
586
          MotionVector{row: pmv.row, col: pmv.col},
587
          &tile_mvs, frame_ref_opt, 0
588 589 590 591 592 593 594 595
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
596
          fi, po,
597 598 599 600 601 602
          &fs.input_hres, &rec.input_hres,
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
603
          false, LAST_FRAME
604 605 606 607
        );
      }
    }
  }
608 609 610 611
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
612
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
613
    bo: BlockOffset, lambda: u32,
614 615
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
616
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
617
  ) {
618
    let po = bo.to_luma_plane_offset();
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
640
      po,
641 642 643 644 645 646 647
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
648 649

  fn sub_pixel_me<T: Pixel>(
650
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &ReferenceFrame<T>,
651
    bo: BlockOffset, lambda: u32,
652
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
653
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
654
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
655 656 657 658 659
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
660
      bo.to_luma_plane_offset(),
661 662 663 664 665 666 667
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
668 669
      blk_w,
      blk_h,
670 671 672 673
      best_mv,
      lowest_cost
    );
  }
674 675 676

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
677
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
678
    _tile_mvs: &TileMotionVectors<'_>, _frame_ref_opt: Option<&ReferenceFrame<T>>,
679
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
680 681 682 683
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
684 685 686 687
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
706
          po,
707 708 709 710 711 712 713 714 715
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
716
}
717

718 719
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
720
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
721
  predictors: &[MotionVector],
722
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
723 724
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
725
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
726
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
727
  *center_mv = MotionVector::default();
728 729 730 731 732 733
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
734
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
735 736 737 738 739 740 741 742

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

743 744
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
745
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
746
  predictors: &[MotionVector],
747
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
748 749
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
750
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
751
  subpixel: bool, ref_frame: RefType)
752 753
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
754 755
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
756
      // Sub-pixel motion estimation
757 758 759 760 761
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
762 763
    } else {
      // Full pixel motion estimation
764
      (16i16, 8i16, None)
765 766
    }
  };
767 768 769 770

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
771
    blk_w, blk_h, center_mv, center_mv_cost,
772
    &mut tmp_plane_opt, ref_frame);
773 774 775

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
776
    let mut best_diamond_mv = MotionVector::default();
777 778 779 780 781 782 783 784 785

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
786
          fi, po, p_org, p_ref, bit_depth,
787
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
788
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
789 790 791 792 793 794 795 796

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
797
      if diamond_radius == diamond_radius_end {
798 799 800 801 802 803 804 805 806 807 808 809 810 811
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

812 813
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
814
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
815
  pmv: [MotionVector; 2], lambda: u32,
816 817
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
818
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
819
  ref_frame: RefType) -> u64
820 821 822 823 824 825 826 827 828 829
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

830
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
831
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
832 833 834
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
835
      po,
836 837 838 839 840 841
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
842
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
843 844 845 846 847 848
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
849
    let plane_ref = p_ref.slice(PlaneOffset {
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
867
  let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
868 869 870 871 872 873 874 875

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

876
fn telescopic_subpel_search<T: Pixel>(
877
  fi: &FrameInvariants<T>, fs: &FrameState<T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
878
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
879
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
880
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
881
  best_mv: &mut MotionVector, lowest_cost: &mut u64
882 883 884 885 886 887 888 889
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
890 891
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
915
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
916 917 918 919

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
920
            po,
921 922 923 924 925 926 927 928
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
929 930
        let plane_org = fs.input.planes[0].slice(po);
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
931

932
        let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, fi.sequence.bit_depth);
933 934 935 936 937 938 939 940 941 942 943 944 945 946 947

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

948
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
949
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
950
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
951
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
952
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
953
) {
Luca Barbato's avatar
Luca Barbato committed
954 955 956 957 958
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
959
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
960
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
961

962
      let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
963

Frank Bossen's avatar
Frank Bossen committed
964 965 966 967 968 969 970 971
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
972
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
973

Luca Barbato's avatar
Luca Barbato committed
974 975 976 977 978
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
979 980 981
}

// Adjust block offset such that entire block lies within frame boundaries
Romain Vimont's avatar
Romain Vimont committed
982
fn adjust_bo<T: Pixel>(bo: BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
983 984 985 986 987 988
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

989
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
990
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
991
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
992 993 994 995 996 997 998 999 1000 1001 1002 1003
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

1004 1005
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
1006
  bo: BlockOffset
1007 1008 1009 1010 1011 1012 1013 1014 1015
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
1016

1017 1018
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
Romain Vimont's avatar
Romain Vimont committed
1019
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
1020 1021 1022 1023
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
1024

1025
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1026
    let mut best_mv = MotionVector::default();
1027

Frank Bossen's avatar
Frank Bossen committed
1028
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
1029
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
1030

1031
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
1032 1033 1034 1035 1036 1037 1038 1039 1040
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
1041
      &mut lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
1042
      po,
Kyle Siefring's avatar
Kyle Siefring committed
1043
      1,
1044
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
1045
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1046
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
1047
      fi.allow_high_precision_mv
1048 1049 1050 1051 1052 1053 1054 1055
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

1056 1057 1058
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
1059 1060
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
1061 1062

  // Generate plane data for get_sad_same()
1063
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {