me.rs 35.1 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
19
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::plane::*;
21
use crate::util::Pixel;
22

Romain Vimont's avatar
Romain Vimont committed
23
use std::ops::{Index, IndexMut};
24
use std::sync::Arc;
25

26
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
27
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
28 29
  use crate::plane::*;
  use crate::util::*;
30
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
31

Raphaël Zumer's avatar
Raphaël Zumer committed
32 33
  use libc;

Luca Barbato's avatar
Luca Barbato committed
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
  extern {
    fn rav1e_sad_4x4_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_8x8_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_16x16_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_32x32_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_64x64_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_128x128_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

    fn rav1e_sad4x4_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad8x8_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad16x16_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad32x32_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad64x64_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad128x128_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113

    fn rav1e_sad16x16_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad32x32_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad64x64_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad128x128_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
Luca Barbato's avatar
Luca Barbato committed
114
  }
Kyle Siefring's avatar
Kyle Siefring committed
115

Luca Barbato's avatar
Luca Barbato committed
116
  #[target_feature(enable = "ssse3")]
117
  unsafe fn sad_hbd_ssse3(
118
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
119 120 121
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
122 123
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
140 141
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
142 143 144
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
145 146
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
147
    }
148
    sum
Kyle Siefring's avatar
Kyle Siefring committed
149 150
  }

151
  #[target_feature(enable = "sse2")]
152 153
  unsafe fn sad_sse2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
154 155
    blk_w: usize
  ) -> u32 {
156 157
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
158 159
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
160 161 162 163 164 165 166
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
167
    let step_size = blk_h.min(blk_w).min(ptr_align);
168 169 170 171 172 173 174 175 176
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
177 178 179 180 181
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
182 183 184 185 186 187
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

188 189 190 191 192
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
    blk_w: usize
  ) -> u32 {
David Michael Barr's avatar
David Michael Barr committed
193 194
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
195 196 197 198 199 200 201 202 203 204 205 206
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
    let step_size = blk_h.min(blk_w);
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_avx2,
      6 => rav1e_sad32x32_avx2,
      7 => rav1e_sad64x64_avx2,
      8 => rav1e_sad128x128_avx2,
      _ => rav1e_sad128x128_avx2
    };
David Michael Barr's avatar
David Michael Barr committed
207 208 209 210 211 212 213 214
    if blk_w == blk_h {
      return func(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
215 216 217 218 219 220
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

Luca Barbato's avatar
Luca Barbato committed
221
  #[inline(always)]
222 223
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
224 225
    blk_w: usize, bit_depth: usize
  ) -> u32 {
226
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
227
    {
228
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
229
        return unsafe {
230 231
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
232
          sad_hbd_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
233 234
        };
      }
235 236 237 238 239 240 241
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
          sad_avx2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
242 243
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
244 245
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
246 247 248
          sad_sse2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
249
    }
Luca Barbato's avatar
Luca Barbato committed
250
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
251
  }
252 253 254
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
255
  use crate::plane::*;
256
  use crate::util::*;
257

Luca Barbato's avatar
Luca Barbato committed
258
  #[inline(always)]
259 260
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
261 262 263
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
264

Luca Barbato's avatar
Luca Barbato committed
265 266
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
267

Luca Barbato's avatar
Luca Barbato committed
268
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
269 270 271
      sum += slice_org
        .iter()
        .zip(slice_ref)
272
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
273
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
274
    }
275

Luca Barbato's avatar
Luca Barbato committed
276 277
    sum
  }
278 279
}

Romain Vimont's avatar
Romain Vimont committed
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

312
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
313
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
314
) -> (isize, isize, isize, isize) {
315 316 317
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
318
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
319
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
320
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
321 322 323 324

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

325
pub fn get_subset_predictors<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
326
  bo: BlockOffset, cmv: MotionVector,
327
  w_in_b: usize, h_in_b: usize,
328
  frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
329
  ref_frame_id: usize
330 331 332
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

333 334 335 336 337 338
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

339 340
  // EPZS subset A and B predictors.

341
  let mut median_preds = Vec::new();
342
  if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
343
    let left = frame_mvs[bo.y][bo.x - 1];
344 345
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
346 347
  }
  if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
348
    let top = frame_mvs[bo.y - 1][bo.x];
349 350
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
351

352
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
353
      let top_right = frame_mvs[bo.y - 1][bo.x + 1];
354 355
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
356 357 358
    }
  }

359
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
360
    let mut median_mv = MotionVector::default();
361
    for mv in median_preds.iter() {
362 363
      median_mv = median_mv + *mv;
    }
364 365 366
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
367 368 369 370 371
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
372
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
373 374

    if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
375
      let left = prev_frame_mvs[bo.y][bo.x - 1];
376
      if !left.is_zero() { predictors.push(left); }
377 378
    }
    if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
379
      let top = prev_frame_mvs[bo.y - 1][bo.x];
380
      if !top.is_zero() { predictors.push(top); }
381
    }
382
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
383
      let right = prev_frame_mvs[bo.y][bo.x + 1];
384
      if !right.is_zero() { predictors.push(right); }
385
    }
386
    if bo.y < h_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
387
      let bottom = prev_frame_mvs[bo.y + 1][bo.x];
388
      if !bottom.is_zero() { predictors.push(bottom); }
389 390
    }

391 392
    let previous = prev_frame_mvs[bo.y][bo.x];
    if !previous.is_zero() { predictors.push(previous); }
393 394 395 396 397
  }

  predictors
}

398
pub trait MotionEstimation {
399
  fn full_pixel_me<T: Pixel>(
400
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
401
    bo: BlockOffset, lambda: u32,
402
    cmv: MotionVector, pmv: [MotionVector; 2],
403 404
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
405
    lowest_cost: &mut u64, ref_frame: RefType
406
  );
407

408
  fn sub_pixel_me<T: Pixel>(
409
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
410
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
411 412
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
413
    lowest_cost: &mut u64, ref_frame: RefType
414 415
  );

416 417
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
418
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
419
    pmv: [MotionVector; 2]
420
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
421
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

437
        Self::full_pixel_me(fi, fs, rec, bo, lambda, cmv, pmv,
438 439
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
440

441
        Self::sub_pixel_me(fi, fs, rec, bo, lambda, pmv,
442
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
443
                           &mut best_mv, &mut lowest_cost, ref_frame);
444 445

        best_mv
Frank Bossen's avatar
Frank Bossen committed
446 447
      }

448
      None => MotionVector::default()
449
    }
450
  }
451 452 453

  fn estimate_motion_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
454
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
455 456 457 458 459
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
      let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
Romain Vimont's avatar
Romain Vimont committed
460
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
461 462 463

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
      let frame_mvs = &fs.frame_mvs[ref_frame];
464
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
465 466 467 468 469 470 471 472

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
473 474
        fi, fs, pmvs, bo_adj,
        frame_mvs, frame_ref_opt, rec, global_mv, lambda,
475 476 477 478 479 480 481 482 483 484 485 486 487
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
    pmvs: &[Option<MotionVector>; 3], bo_adj_h: BlockOffset,
488
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
489
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
490 491 492 493
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
494
}
495

496 497 498
pub struct DiamondSearch {}
pub struct FullSearch {}

499 500
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
501
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
502
    bo: BlockOffset, lambda: u32,
503 504
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
505
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
506
  ) {
Thomas Daede's avatar
Thomas Daede committed
507
    let frame_mvs = &fs.frame_mvs[ref_frame.to_index()];
508
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
509
    let predictors =
Thomas Daede's avatar
Thomas Daede committed
510
      get_subset_predictors(bo, cmv, fi.w_in_b, fi.h_in_b, frame_mvs, frame_ref, ref_frame.to_index());
511 512 513

    diamond_me_search(
      fi,
514
      bo.to_luma_plane_offset(),
515 516 517 518 519 520 521 522 523 524 525 526 527 528
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
529
      false,
530 531 532
      ref_frame
    );
  }
533 534

  fn sub_pixel_me<T: Pixel>(
535
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
536
    bo: BlockOffset, lambda: u32,
537 538
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
539
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
540 541 542 543 544
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
545
      bo.to_luma_plane_offset(),
546 547 548 549 550 551 552 553 554 555 556 557 558 559
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
560
      true,
561 562 563
      ref_frame
    );
  }
564 565 566

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
567
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
568
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
569
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
570 571 572 573
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
574 575 576 577
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
578 579 580
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
581
          bo_adj,
582 583 584 585 586 587 588 589 590 591 592
          MotionVector{row: pmv.row, col: pmv.col},
          fi.w_in_b, fi.h_in_b,
          &frame_mvs, frame_ref_opt, 0
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
593
          fi, po,
594 595 596 597 598 599
          &fs.input_hres, &rec.input_hres,
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
600
          false, LAST_FRAME
601 602 603 604
        );
      }
    }
  }
605 606 607 608
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
609
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
610
    bo: BlockOffset, lambda: u32,
611 612
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
613
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
614
  ) {
615
    let po = bo.to_luma_plane_offset();
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
637
      po,
638 639 640 641 642 643 644
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
645 646

  fn sub_pixel_me<T: Pixel>(
647
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &ReferenceFrame<T>,
648
    bo: BlockOffset, lambda: u32,
649
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
650
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
651
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
652 653 654 655 656
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
657
      bo.to_luma_plane_offset(),
658 659 660 661 662 663 664
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
665 666
      blk_w,
      blk_h,
667 668 669 670
      best_mv,
      lowest_cost
    );
  }
671 672 673

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
674
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
675
    _frame_mvs: &FrameMotionVectors, _frame_ref_opt: Option<&ReferenceFrame<T>>,
676
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
677 678 679 680
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
681 682 683 684
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
703
          po,
704 705 706 707 708 709 710 711 712
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
713
}
714

715 716
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
717
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
718
  predictors: &[MotionVector],
719
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
720 721
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
722
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
723
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
724
  *center_mv = MotionVector::default();
725 726 727 728 729 730
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
731
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
732 733 734 735 736 737 738 739

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

740 741
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
742
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
743
  predictors: &[MotionVector],
744
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
745 746
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
747
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
748
  subpixel: bool, ref_frame: RefType)
749 750
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
751 752
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
753
      // Sub-pixel motion estimation
754 755 756 757 758
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
759 760
    } else {
      // Full pixel motion estimation
761
      (16i16, 8i16, None)
762 763
    }
  };
764 765 766 767

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
768
    blk_w, blk_h, center_mv, center_mv_cost,
769
    &mut tmp_plane_opt, ref_frame);
770 771 772

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
773
    let mut best_diamond_mv = MotionVector::default();
774 775 776 777 778 779 780 781 782

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
783
          fi, po, p_org, p_ref, bit_depth,
784
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
785
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
786 787 788 789 790 791 792 793

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
794
      if diamond_radius == diamond_radius_end {
795 796 797 798 799 800 801 802 803 804 805 806 807 808
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

809 810
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
811
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
812
  pmv: [MotionVector; 2], lambda: u32,
813 814
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
815
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
816
  ref_frame: RefType) -> u64
817 818 819 820 821 822 823 824 825 826
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

827
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
828
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
829 830 831
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
832
      po,
833 834 835 836 837 838
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
839
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
840 841 842 843 844 845
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
846
    let plane_ref = p_ref.slice(PlaneOffset {
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
864 865 866 867 868 869 870 871 872
  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

873
fn telescopic_subpel_search<T: Pixel>(
874
  fi: &FrameInvariants<T>, fs: &FrameState<T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
875
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
876
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
877
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
878
  best_mv: &mut MotionVector, lowest_cost: &mut u64
879 880 881 882 883 884 885 886
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
887 888
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
912
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
913 914 915 916

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
917
            po,
918 919 920 921 922 923 924 925
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
926 927
        let plane_org = fs.input.planes[0].slice(po);
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944

        let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

945
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
946
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
947
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
948
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
949
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
950
) {
Luca Barbato's avatar
Luca Barbato committed
951 952 953 954 955
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
956
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
957
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
958

Kyle Siefring's avatar
Kyle Siefring committed
959
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
960

Frank Bossen's avatar
Frank Bossen committed
961 962 963 964 965 966 967 968
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
969
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
970

Luca Barbato's avatar
Luca Barbato committed
971 972 973 974 975
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
976 977 978
}

// Adjust block offset such that entire block lies within frame boundaries
Romain Vimont's avatar
Romain Vimont committed
979
fn adjust_bo<T: Pixel>(bo: BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
980 981 982 983 984 985
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

986
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
987
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
988
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
989 990 991 992 993 994 995 996 997 998 999 1000
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

1001 1002
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
1003
  bo: BlockOffset
1004 1005 1006 1007 1008 1009 1010 1011 1012
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
1013

1014 1015
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
Romain Vimont's avatar
Romain Vimont committed
1016
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
1017 1018 1019 1020
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
1021

1022
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1023
    let mut best_mv = MotionVector::default();
1024

Frank Bossen's avatar
Frank Bossen committed
1025
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
1026
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
1027

1028
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
1029 1030 1031 1032 1033 1034 1035 1036 1037
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
1038
      &mut lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
1039
      po,
Kyle Siefring's avatar
Kyle Siefring committed
1040
      1,
1041
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
1042
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
1043
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
1044
      fi.allow_high_precision_mv
1045 1046 1047 1048 1049 1050 1051 1052
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

1053 1054 1055
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
1056 1057
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
1058 1059

  // Generate plane data for get_sad_same()
1060
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {
1061 1062
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
1063 1064
    // Make the test pattern robust to data alignment
    let xpad_off = (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32;
Luca Barbato's avatar
Luca Barbato committed
1065

1066
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
1067
      for (j, pixel) in row.into_iter().enumerate() {
1068
        let val = (j + i) as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
1069
        assert!(val >= u8::min_value().into() &&
1070
            val <= u8::max_value().into());
1071
        *pixel = T::cast_from(val);
1072 1073 1074 1075
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
1076
      for (j, pixel) in row.into_iter().enumerate() {
1077
        let val = j as i32 - i as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
1078
        assert!(val >= u8::min_value().into() &&
1079
            val <= u8::max_value().into());
1080
        *pixel = T::cast_from(val);
1081 1082 1083 1084 1085 1086 1087
      }
    }

    (input_plane, rec_plane)
  }

  // Regression and validation test for SAD computation
1088
  fn get_sad_same_inner<T: Pixel>() {
1089
    let blocks: Vec<(BlockSize, u32)> = vec![
1090
      (BLOCK_4X4, 1912),
1091 1092
      (BLOCK_4X8, 4296),
      (BLOCK_8X4, 3496),
1093
      (BLOCK_8X8, 7824),
1094 1095
      (BLOCK_8X16, 16592),
      (BLOCK_16X8, 14416),
1096
      (BLOCK_16X16, 31136),
1097 1098
      (BLOCK_16X32, 60064),
      (BLOCK_32X16, 59552),
1099
      (BLOCK_32X32, 120128),
1100 1101
      (BLOCK_32X64, 186688),
      (BLOCK_64X32, 250176),
1102
      (BLOCK_64X64, 438912),
1103 1104
      (BLOCK_64X128, 654272),
      (BLOCK_128X64, 1016768),
1105
      (BLOCK_128X128, 1689792),