me.rs 34.4 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
19
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::plane::*;
21
use crate::util::Pixel;
22

Romain Vimont's avatar
Romain Vimont committed
23
use std::ops::{Index, IndexMut};
24
use std::sync::Arc;
25

26
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
27
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
28 29
  use crate::plane::*;
  use crate::util::*;
30
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
31

Raphaël Zumer's avatar
Raphaël Zumer committed
32 33
  use libc;

34 35 36 37 38 39 40 41 42
  macro_rules! declare_asm_sad {
    ($(($name: ident, $T: ident)),+) => (
      $(
        extern { fn $name (
          src: *const $T, src_stride: libc::ptrdiff_t, dst: *const $T,
          dst_stride: libc::ptrdiff_t
        ) -> u32; }
      )+
    )
Luca Barbato's avatar
Luca Barbato committed
43
  }
Kyle Siefring's avatar
Kyle Siefring committed
44

45
  declare_asm_sad![
46
    // SSSE3
47 48 49 50 51 52 53
    (rav1e_sad_4x4_hbd_ssse3, u16),
    (rav1e_sad_8x8_hbd10_ssse3, u16),
    (rav1e_sad_16x16_hbd_ssse3, u16),
    (rav1e_sad_32x32_hbd10_ssse3, u16),
    (rav1e_sad_64x64_hbd10_ssse3, u16),
    (rav1e_sad_128x128_hbd10_ssse3, u16),

54
    // SSE2
55
    (rav1e_sad4x4_sse2, u8),
56 57 58 59
    (rav1e_sad4x8_sse2, u8),
    (rav1e_sad4x16_sse2, u8),

    (rav1e_sad8x4_sse2, u8),
60
    (rav1e_sad8x8_sse2, u8),
61 62 63
    (rav1e_sad8x16_sse2, u8),
    (rav1e_sad8x32_sse2, u8),

64
    (rav1e_sad16x16_sse2, u8),
65

66
    (rav1e_sad32x32_sse2, u8),
67

68
    (rav1e_sad64x64_sse2, u8),
69

70 71
    (rav1e_sad128x128_sse2, u8),

72 73 74
    // AVX
    (rav1e_sad16x4_avx2, u8),
    (rav1e_sad16x8_avx2, u8),
75
    (rav1e_sad16x16_avx2, u8),
76 77 78 79 80
    (rav1e_sad16x32_avx2, u8),
    (rav1e_sad16x64_avx2, u8),

    (rav1e_sad32x8_avx2, u8),
    (rav1e_sad32x16_avx2, u8),
81
    (rav1e_sad32x32_avx2, u8),
82 83 84 85
    (rav1e_sad32x64_avx2, u8),

    (rav1e_sad64x16_avx2, u8),
    (rav1e_sad64x32_avx2, u8),
86
    (rav1e_sad64x64_avx2, u8),
87 88 89
    (rav1e_sad64x128_avx2, u8),

    (rav1e_sad128x64_avx2, u8),
90 91 92
    (rav1e_sad128x128_avx2, u8)
  ];

Luca Barbato's avatar
Luca Barbato committed
93
  #[target_feature(enable = "ssse3")]
94
  unsafe fn sad_hbd_ssse3(
95 96
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_w: usize,
    blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
97 98
  ) -> u32 {
    let mut sum = 0 as u32;
99 100
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
117 118
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
119 120 121
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
122 123
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
124
    }
125
    sum
Kyle Siefring's avatar
Kyle Siefring committed
126 127
  }

128
  #[target_feature(enable = "sse2")]
129
  unsafe fn sad_sse2(
130 131
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
132
  ) -> u32 {
133 134
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
135 136
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
137 138 139 140 141 142 143
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
144
    let step_size = blk_h.min(blk_w).min(ptr_align);
145 146 147 148 149 150 151 152 153
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
154 155 156 157 158
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
159 160 161 162 163 164
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

165 166
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
167 168
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
169
  ) -> u32 {
David Michael Barr's avatar
David Michael Barr committed
170 171
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
172 173
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204

    let func = match (blk_w, blk_h) {
      (4, 4) => rav1e_sad4x4_sse2,
      (4, 8) => rav1e_sad4x8_sse2,
      (4, 16) => rav1e_sad4x16_sse2,

      (8, 4) => rav1e_sad8x4_sse2,
      (8, 8) => rav1e_sad8x8_sse2,
      (8, 16) => rav1e_sad8x16_sse2,
      (8, 32) => rav1e_sad8x32_sse2,

      (16, 4) => rav1e_sad16x4_avx2,
      (16, 8) => rav1e_sad16x8_avx2,
      (16, 16) => rav1e_sad16x16_avx2,
      (16, 32) => rav1e_sad16x32_avx2,
      (16, 64) => rav1e_sad16x64_avx2,

      (32, 8) => rav1e_sad32x8_avx2,
      (32, 16) => rav1e_sad32x16_avx2,
      (32, 32) => rav1e_sad32x32_avx2,
      (32, 64) => rav1e_sad32x64_avx2,

      (64, 16) => rav1e_sad64x16_avx2,
      (64, 32) => rav1e_sad64x32_avx2,
      (64, 64) => rav1e_sad64x64_avx2,
      (64, 128) => rav1e_sad64x128_avx2,

      (128, 64) => rav1e_sad128x64_avx2,
      (128, 128) => rav1e_sad128x128_avx2,

      _ => unreachable!()
205
    };
206 207
    func(org_ptr, org_stride, ref_ptr, ref_stride)

208 209
  }

Luca Barbato's avatar
Luca Barbato committed
210
  #[inline(always)]
211
  pub fn get_sad<T: Pixel>(
212
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize, blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
213
  ) -> u32 {
214
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
215
    {
216
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
217
        return unsafe {
218 219
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
220
          sad_hbd_ssse3(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
221 222
        };
      }
223
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") && blk_h >= 4 && blk_w >= 4 {
224 225 226
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
227
          sad_avx2(plane_org, plane_ref, blk_w, blk_h)
228 229
        };
      }
230 231
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
232 233
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
234
          sad_sse2(plane_org, plane_ref, blk_w, blk_h)
235 236
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
237
    }
238
    super::native::get_sad(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
239
  }
240 241 242
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
243
  use crate::plane::*;
244
  use crate::util::*;
245

Luca Barbato's avatar
Luca Barbato committed
246
  #[inline(always)]
247
  pub fn get_sad<T: Pixel>(
248 249
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize,
    blk_h: usize, _bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
250 251
  ) -> u32 {
    let mut sum = 0 as u32;
252

Luca Barbato's avatar
Luca Barbato committed
253 254
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
255

Luca Barbato's avatar
Luca Barbato committed
256
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
257 258 259
      sum += slice_org
        .iter()
        .zip(slice_ref)
260
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
261
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
262
    }
263

Luca Barbato's avatar
Luca Barbato committed
264 265
    sum
  }
266 267
}

Romain Vimont's avatar
Romain Vimont committed
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

300
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
301
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
302
) -> (isize, isize, isize, isize) {
303 304 305
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
306
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
307
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
308
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
309 310 311 312

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

313
pub fn get_subset_predictors<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
314
  bo: BlockOffset, cmv: MotionVector,
315
  w_in_b: usize, h_in_b: usize,
316
  frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
317
  ref_frame_id: usize
318 319 320
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

321 322 323 324 325 326
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

327 328
  // EPZS subset A and B predictors.

329
  let mut median_preds = Vec::new();
330
  if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
331
    let left = frame_mvs[bo.y][bo.x - 1];
332 333
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
334 335
  }
  if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
336
    let top = frame_mvs[bo.y - 1][bo.x];
337 338
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
339

340
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
341
      let top_right = frame_mvs[bo.y - 1][bo.x + 1];
342 343
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
344 345 346
    }
  }

347
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
348
    let mut median_mv = MotionVector::default();
349
    for mv in median_preds.iter() {
350 351
      median_mv = median_mv + *mv;
    }
352 353 354
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
355 356 357 358 359
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
360
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
361 362

    if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
363
      let left = prev_frame_mvs[bo.y][bo.x - 1];
364
      if !left.is_zero() { predictors.push(left); }
365 366
    }
    if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
367
      let top = prev_frame_mvs[bo.y - 1][bo.x];
368
      if !top.is_zero() { predictors.push(top); }
369
    }
370
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
371
      let right = prev_frame_mvs[bo.y][bo.x + 1];
372
      if !right.is_zero() { predictors.push(right); }
373
    }
374
    if bo.y < h_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
375
      let bottom = prev_frame_mvs[bo.y + 1][bo.x];
376
      if !bottom.is_zero() { predictors.push(bottom); }
377 378
    }

379 380
    let previous = prev_frame_mvs[bo.y][bo.x];
    if !previous.is_zero() { predictors.push(previous); }
381 382 383 384 385
  }

  predictors
}

386
pub trait MotionEstimation {
387
  fn full_pixel_me<T: Pixel>(
388
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
389
    bo: BlockOffset, lambda: u32,
390
    cmv: MotionVector, pmv: [MotionVector; 2],
391 392
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
393
    lowest_cost: &mut u64, ref_frame: RefType
394
  );
395

396
  fn sub_pixel_me<T: Pixel>(
397
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
398
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
399 400
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
401
    lowest_cost: &mut u64, ref_frame: RefType
402 403
  );

404 405
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
406
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
407
    pmv: [MotionVector; 2]
408
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
409
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

425
        Self::full_pixel_me(fi, fs, rec, bo, lambda, cmv, pmv,
426 427
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
428

429
        Self::sub_pixel_me(fi, fs, rec, bo, lambda, pmv,
430
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
431
                           &mut best_mv, &mut lowest_cost, ref_frame);
432 433

        best_mv
Frank Bossen's avatar
Frank Bossen committed
434 435
      }

436
      None => MotionVector::default()
437
    }
438
  }
439 440 441

  fn estimate_motion_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
442
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
443 444 445 446 447
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
      let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
Romain Vimont's avatar
Romain Vimont committed
448
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
449 450 451

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
      let frame_mvs = &fs.frame_mvs[ref_frame];
452
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
453 454 455 456 457 458 459 460

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
461 462
        fi, fs, pmvs, bo_adj,
        frame_mvs, frame_ref_opt, rec, global_mv, lambda,
463 464 465 466 467 468 469 470 471 472 473 474 475
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
    pmvs: &[Option<MotionVector>; 3], bo_adj_h: BlockOffset,
476
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
477
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
478 479 480 481
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
482
}
483

484 485 486
pub struct DiamondSearch {}
pub struct FullSearch {}

487 488
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
489
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
490
    bo: BlockOffset, lambda: u32,
491 492
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
493
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
494
  ) {
Thomas Daede's avatar
Thomas Daede committed
495
    let frame_mvs = &fs.frame_mvs[ref_frame.to_index()];
496
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
497
    let predictors =
Thomas Daede's avatar
Thomas Daede committed
498
      get_subset_predictors(bo, cmv, fi.w_in_b, fi.h_in_b, frame_mvs, frame_ref, ref_frame.to_index());
499 500 501

    diamond_me_search(
      fi,
502
      bo.to_luma_plane_offset(),
503 504 505 506 507 508 509 510 511 512 513 514 515 516
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
517
      false,
518 519 520
      ref_frame
    );
  }
521 522

  fn sub_pixel_me<T: Pixel>(
523
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
524
    bo: BlockOffset, lambda: u32,
525 526
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
527
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
528 529 530 531 532
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
533
      bo.to_luma_plane_offset(),
534 535 536 537 538 539 540 541 542 543 544 545 546 547
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
548
      true,
549 550 551
      ref_frame
    );
  }
552 553 554

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
555
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
556
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
557
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
558 559 560 561
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
562 563 564 565
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
566 567 568
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
569
          bo_adj,
570 571 572 573 574 575 576 577 578 579 580
          MotionVector{row: pmv.row, col: pmv.col},
          fi.w_in_b, fi.h_in_b,
          &frame_mvs, frame_ref_opt, 0
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
581
          fi, po,
582 583 584 585 586 587
          &fs.input_hres, &rec.input_hres,
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
588
          false, LAST_FRAME
589 590 591 592
        );
      }
    }
  }
593 594 595 596
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
597
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
598
    bo: BlockOffset, lambda: u32,
599 600
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
601
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
602
  ) {
603
    let po = bo.to_luma_plane_offset();
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
625
      po,
626 627 628 629 630 631 632
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
633 634

  fn sub_pixel_me<T: Pixel>(
635
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &ReferenceFrame<T>,
636
    bo: BlockOffset, lambda: u32,
637
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
638
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
639
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
640 641 642 643 644
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
645
      bo.to_luma_plane_offset(),
646 647 648 649 650 651 652
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
653 654
      blk_w,
      blk_h,
655 656 657 658
      best_mv,
      lowest_cost
    );
  }
659 660 661

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
662
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
663
    _frame_mvs: &FrameMotionVectors, _frame_ref_opt: Option<&ReferenceFrame<T>>,
664
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
665 666 667 668
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
669 670 671 672
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
691
          po,
692 693 694 695 696 697 698 699 700
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
701
}
702

703 704
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
705
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
706
  predictors: &[MotionVector],
707
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
708 709
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
710
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
711
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
712
  *center_mv = MotionVector::default();
713 714 715 716 717 718
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
719
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
720 721 722 723 724 725 726 727

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

728 729
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
730
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
731
  predictors: &[MotionVector],
732
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
733 734
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
735
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
736
  subpixel: bool, ref_frame: RefType)
737 738
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
739 740
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
741
      // Sub-pixel motion estimation
742 743 744 745 746
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
747 748
    } else {
      // Full pixel motion estimation
749
      (16i16, 8i16, None)
750 751
    }
  };
752 753 754 755

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
756
    blk_w, blk_h, center_mv, center_mv_cost,
757
    &mut tmp_plane_opt, ref_frame);
758 759 760

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
761
    let mut best_diamond_mv = MotionVector::default();
762 763 764 765 766 767 768 769 770

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
771
          fi, po, p_org, p_ref, bit_depth,
772
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
773
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
774 775 776 777 778 779 780 781

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
782
      if diamond_radius == diamond_radius_end {
783 784 785 786 787 788 789 790 791 792 793 794 795 796
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

797 798
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
799
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
800
  pmv: [MotionVector; 2], lambda: u32,
801 802
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
803
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
804
  ref_frame: RefType) -> u64
805 806 807 808 809 810 811 812 813 814
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

815
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
816
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
817 818 819
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
820
      po,
821 822 823 824 825 826
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
827
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
828 829 830 831 832 833
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
834
    let plane_ref = p_ref.slice(PlaneOffset {
835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
852
  let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
853 854 855 856 857 858 859 860

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

861
fn telescopic_subpel_search<T: Pixel>(
862
  fi: &FrameInvariants<T>, fs: &FrameState<T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
863
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
864
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
865
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
866
  best_mv: &mut MotionVector, lowest_cost: &mut u64
867 868 869 870 871 872 873 874
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
875 876
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
900
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
901 902 903 904

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
905
            po,
906 907 908 909 910 911 912 913
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
914 915
        let plane_org = fs.input.planes[0].slice(po);
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
916

917
        let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, fi.sequence.bit_depth);
918 919 920 921 922 923 924 925 926 927 928 929 930 931 932

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

933
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
934
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
935
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
936
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
937
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
938
) {
Luca Barbato's avatar
Luca Barbato committed
939 940 941 942 943
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
944
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
945
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
946

947
      let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
948

Frank Bossen's avatar
Frank Bossen committed
949 950 951 952 953 954 955 956
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
957
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
958

Luca Barbato's avatar
Luca Barbato committed
959 960 961 962 963
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
964 965 966
}

// Adjust block offset such that entire block lies within frame boundaries
Romain Vimont's avatar
Romain Vimont committed
967
fn adjust_bo<T: Pixel>(bo: BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
968 969 970 971 972 973
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

974
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
975
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
976
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
977 978 979 980 981 982 983 984 985 986 987 988
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

989 990
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
991
  bo: BlockOffset
992 993 994 995 996 997 998 999 1000
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
1001

1002 1003
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
Romain Vimont's avatar
Romain Vimont committed
1004
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
1005 1006 1007 1008
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
1009

1010
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1011
    let mut best_mv = MotionVector::default();
1012

Frank Bossen's avatar
Frank Bossen committed
1013
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
1014
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
1015

1016
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
1017 1018 1019 1020 1021 1022 1023 1024 1025
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
1026
      &mut lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
1027
      po,
Kyle Siefring's avatar
Kyle Siefring committed
1028
      1,
1029
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
1030
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1031
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
1032
      fi.allow_high_precision_mv
1033 1034 1035 1036 1037 1038 1039 1040
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

1041 1042 1043
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
1044 1045
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
1046 1047

  // Generate plane data for get_sad_same()
1048
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {
1049 1050
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
1051 1052
    // Make the test pattern robust to data alignment
    let xpad_off = (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32;
Luca Barbato's avatar
Luca Barbato committed
1053

1054
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
1055
      for (j, pixel) in row.into_iter().enumerate() {
1056
        let val = (j + i) as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
1057
        assert!(val >= u8::min_value().into() &&