me.rs 35.2 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
19
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
use crate::plane::*;
21
use crate::util::Pixel;
22

Romain Vimont's avatar
Romain Vimont committed
23
use std::ops::{Index, IndexMut};
24
use std::sync::Arc;
25

26
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
27
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
28 29
  use crate::plane::*;
  use crate::util::*;
30
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
31

Raphaël Zumer's avatar
Raphaël Zumer committed
32 33
  use libc;

Luca Barbato's avatar
Luca Barbato committed
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
  extern {
    fn rav1e_sad_4x4_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_8x8_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_16x16_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_32x32_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_64x64_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_128x128_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

    fn rav1e_sad4x4_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad8x8_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad16x16_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad32x32_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad64x64_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad128x128_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113

    fn rav1e_sad16x16_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad32x32_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad64x64_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad128x128_avx2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
Luca Barbato's avatar
Luca Barbato committed
114
  }
Kyle Siefring's avatar
Kyle Siefring committed
115

Luca Barbato's avatar
Luca Barbato committed
116
  #[target_feature(enable = "ssse3")]
117
  unsafe fn sad_hbd_ssse3(
118
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
119 120 121
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
122 123
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
140 141
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
142 143 144
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
145 146
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
147
    }
148
    sum
Kyle Siefring's avatar
Kyle Siefring committed
149 150
  }

151
  #[target_feature(enable = "sse2")]
152 153
  unsafe fn sad_sse2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
154 155
    blk_w: usize
  ) -> u32 {
156 157
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
158 159
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
160 161 162 163 164 165 166
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
167
    let step_size = blk_h.min(blk_w).min(ptr_align);
168 169 170 171 172 173 174 175 176
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
177 178 179 180 181
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
182 183 184 185 186 187
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
    blk_w: usize
  ) -> u32 {
    let mut sum = 0 as u32;
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size = blk_h.min(blk_w);
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_avx2,
      6 => rav1e_sad32x32_avx2,
      7 => rav1e_sad64x64_avx2,
      8 => rav1e_sad128x128_avx2,
      _ => rav1e_sad128x128_avx2
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
        // FIXME for now, T == u8
        let org_ptr = org_ptr as *const u8;
        let ref_ptr = ref_ptr as *const u8;
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

Luca Barbato's avatar
Luca Barbato committed
222
  #[inline(always)]
223 224
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
225 226
    blk_w: usize, bit_depth: usize
  ) -> u32 {
227
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
228
    {
229
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
230
        return unsafe {
231 232
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
233
          sad_hbd_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
234 235
        };
      }
236 237 238 239 240 241 242
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
          sad_avx2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
243 244
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
245 246
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
247 248 249
          sad_sse2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
250
    }
Luca Barbato's avatar
Luca Barbato committed
251
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
252
  }
253 254 255
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
256
  use crate::plane::*;
257
  use crate::util::*;
258

Luca Barbato's avatar
Luca Barbato committed
259
  #[inline(always)]
260 261
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
262 263 264
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
265

Luca Barbato's avatar
Luca Barbato committed
266 267
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
268

Luca Barbato's avatar
Luca Barbato committed
269
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
270 271 272
      sum += slice_org
        .iter()
        .zip(slice_ref)
273
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
274
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
275
    }
276

Luca Barbato's avatar
Luca Barbato committed
277 278
    sum
  }
279 280
}

Romain Vimont's avatar
Romain Vimont committed
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

313
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
314
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
315
) -> (isize, isize, isize, isize) {
316 317 318
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
319
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
320
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
321
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
322 323 324 325

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

326
pub fn get_subset_predictors<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
327
  bo: BlockOffset, cmv: MotionVector,
328
  w_in_b: usize, h_in_b: usize,
329
  frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
330
  ref_frame_id: usize
331 332 333
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

334 335 336 337 338 339
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

340 341
  // EPZS subset A and B predictors.

342
  let mut median_preds = Vec::new();
343
  if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
344
    let left = frame_mvs[bo.y][bo.x - 1];
345 346
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
347 348
  }
  if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
349
    let top = frame_mvs[bo.y - 1][bo.x];
350 351
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
352

353
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
354
      let top_right = frame_mvs[bo.y - 1][bo.x + 1];
355 356
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
357 358 359
    }
  }

360
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
361
    let mut median_mv = MotionVector::default();
362
    for mv in median_preds.iter() {
363 364
      median_mv = median_mv + *mv;
    }
365 366 367
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
368 369 370 371 372
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
373
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
374 375

    if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
376
      let left = prev_frame_mvs[bo.y][bo.x - 1];
377
      if !left.is_zero() { predictors.push(left); }
378 379
    }
    if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
380
      let top = prev_frame_mvs[bo.y - 1][bo.x];
381
      if !top.is_zero() { predictors.push(top); }
382
    }
383
    if bo.x < w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
384
      let right = prev_frame_mvs[bo.y][bo.x + 1];
385
      if !right.is_zero() { predictors.push(right); }
386
    }
387
    if bo.y < h_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
388
      let bottom = prev_frame_mvs[bo.y + 1][bo.x];
389
      if !bottom.is_zero() { predictors.push(bottom); }
390 391
    }

392 393
    let previous = prev_frame_mvs[bo.y][bo.x];
    if !previous.is_zero() { predictors.push(previous); }
394 395 396 397 398
  }

  predictors
}

399
pub trait MotionEstimation {
400
  fn full_pixel_me<T: Pixel>(
401
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
402
    bo: BlockOffset, lambda: u32,
403
    cmv: MotionVector, pmv: [MotionVector; 2],
404 405
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
406
    lowest_cost: &mut u64, ref_frame: RefType
407
  );
408

409
  fn sub_pixel_me<T: Pixel>(
410
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
411
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
412 413
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
414
    lowest_cost: &mut u64, ref_frame: RefType
415 416
  );

417 418
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
419
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
420
    pmv: [MotionVector; 2]
421
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
422
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

438
        Self::full_pixel_me(fi, fs, rec, bo, lambda, cmv, pmv,
439 440
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
441

442
        Self::sub_pixel_me(fi, fs, rec, bo, lambda, pmv,
443
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
444
                           &mut best_mv, &mut lowest_cost, ref_frame);
445 446

        best_mv
Frank Bossen's avatar
Frank Bossen committed
447 448
      }

449
      None => MotionVector::default()
450
    }
451
  }
452 453 454

  fn estimate_motion_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
455
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
456 457 458 459 460
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
      let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
Romain Vimont's avatar
Romain Vimont committed
461
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
462 463 464

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
      let frame_mvs = &fs.frame_mvs[ref_frame];
465
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
466 467 468 469 470 471 472 473

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
474 475
        fi, fs, pmvs, bo_adj,
        frame_mvs, frame_ref_opt, rec, global_mv, lambda,
476 477 478 479 480 481 482 483 484 485 486 487 488
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
    pmvs: &[Option<MotionVector>; 3], bo_adj_h: BlockOffset,
489
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
490
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
491 492 493 494
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
495
}
496

497 498 499
pub struct DiamondSearch {}
pub struct FullSearch {}

500 501
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
502
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
503
    bo: BlockOffset, lambda: u32,
504 505
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
506
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
507
  ) {
Thomas Daede's avatar
Thomas Daede committed
508
    let frame_mvs = &fs.frame_mvs[ref_frame.to_index()];
509
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
510
    let predictors =
Thomas Daede's avatar
Thomas Daede committed
511
      get_subset_predictors(bo, cmv, fi.w_in_b, fi.h_in_b, frame_mvs, frame_ref, ref_frame.to_index());
512 513 514

    diamond_me_search(
      fi,
515
      bo.to_luma_plane_offset(),
516 517 518 519 520 521 522 523 524 525 526 527 528 529
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
530
      false,
531 532 533
      ref_frame
    );
  }
534 535

  fn sub_pixel_me<T: Pixel>(
536
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
537
    bo: BlockOffset, lambda: u32,
538 539
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
540
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
541 542 543 544 545
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
546
      bo.to_luma_plane_offset(),
547 548 549 550 551 552 553 554 555 556 557 558 559 560
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
561
      true,
562 563 564
      ref_frame
    );
  }
565 566 567

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
568
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
569
    frame_mvs: &FrameMotionVectors, frame_ref_opt: Option<&ReferenceFrame<T>>,
570
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
571 572 573 574
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
575 576 577 578
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
579 580 581
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
582
          bo_adj,
583 584 585 586 587 588 589 590 591 592 593
          MotionVector{row: pmv.row, col: pmv.col},
          fi.w_in_b, fi.h_in_b,
          &frame_mvs, frame_ref_opt, 0
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
594
          fi, po,
595 596 597 598 599 600
          &fs.input_hres, &rec.input_hres,
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
601
          false, LAST_FRAME
602 603 604 605
        );
      }
    }
  }
606 607 608 609
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
610
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &ReferenceFrame<T>,
611
    bo: BlockOffset, lambda: u32,
612 613
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
614
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
615
  ) {
616
    let po = bo.to_luma_plane_offset();
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
638
      po,
639 640 641 642 643 644 645
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
646 647

  fn sub_pixel_me<T: Pixel>(
648
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &ReferenceFrame<T>,
649
    bo: BlockOffset, lambda: u32,
650
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
651
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
652
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
653 654 655 656 657
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
658
      bo.to_luma_plane_offset(),
659 660 661 662 663 664 665
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
666 667
      blk_w,
      blk_h,
668 669 670 671
      best_mv,
      lowest_cost
    );
  }
672 673 674

  fn me_ss2<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>,
675
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
676
    _frame_mvs: &FrameMotionVectors, _frame_ref_opt: Option<&ReferenceFrame<T>>,
677
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
678 679 680 681
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
682 683 684 685
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
    };
686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
704
          po,
705 706 707 708 709 710 711 712 713
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
714
}
715

716 717
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
718
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
719
  predictors: &[MotionVector],
720
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
721 722
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
723
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
724
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
725
  *center_mv = MotionVector::default();
726 727 728 729 730 731
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
732
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
733 734 735 736 737 738 739 740

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

741 742
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
743
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
744
  predictors: &[MotionVector],
745
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
746 747
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
748
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
749
  subpixel: bool, ref_frame: RefType)
750 751
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
752 753
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
754
      // Sub-pixel motion estimation
755 756 757 758 759
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
760 761
    } else {
      // Full pixel motion estimation
762
      (16i16, 8i16, None)
763 764
    }
  };
765 766 767 768

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
769
    blk_w, blk_h, center_mv, center_mv_cost,
770
    &mut tmp_plane_opt, ref_frame);
771 772 773

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
774
    let mut best_diamond_mv = MotionVector::default();
775 776 777 778 779 780 781 782 783

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
784
          fi, po, p_org, p_ref, bit_depth,
785
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
786
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
787 788 789 790 791 792 793 794

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
795
      if diamond_radius == diamond_radius_end {
796 797 798 799 800 801 802 803 804 805 806 807 808 809
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

810 811
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
812
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
813
  pmv: [MotionVector; 2], lambda: u32,
814 815
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
816
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
817
  ref_frame: RefType) -> u64
818 819 820 821 822 823 824 825 826 827
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

828
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
829
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
830 831 832
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
833
      po,
834 835 836 837 838 839
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
840
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
841 842 843 844 845 846
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
847
    let plane_ref = p_ref.slice(PlaneOffset {
848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
865 866 867 868 869 870 871 872 873
  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

874
fn telescopic_subpel_search<T: Pixel>(
875
  fi: &FrameInvariants<T>, fs: &FrameState<T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
876
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
877
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
878
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
879
  best_mv: &mut MotionVector, lowest_cost: &mut u64
880 881 882 883 884 885 886 887
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
888 889
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
913
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
914 915 916 917

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
918
            po,
919 920 921 922 923 924 925 926
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
927 928
        let plane_org = fs.input.planes[0].slice(po);
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945

        let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

946
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
947
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
948
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
949
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
950
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
951
) {
Luca Barbato's avatar
Luca Barbato committed
952 953 954 955 956
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
957
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
958
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
959

Kyle Siefring's avatar
Kyle Siefring committed
960
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
961

Frank Bossen's avatar
Frank Bossen committed
962 963 964 965 966 967 968 969
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
970
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
971

Luca Barbato's avatar
Luca Barbato committed
972 973 974 975 976
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
977 978 979
}

// Adjust block offset such that entire block lies within frame boundaries
Romain Vimont's avatar
Romain Vimont committed
980
fn adjust_bo<T: Pixel>(bo: BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
981 982 983 984 985 986
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

987
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
988
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
989
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
990 991 992 993 994 995 996 997 998 999 1000 1001
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

1002 1003
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
1004
  bo: BlockOffset
1005 1006 1007 1008 1009 1010 1011 1012 1013
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
1014

1015 1016
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
Romain Vimont's avatar
Romain Vimont committed
1017
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo_adj, blk_w, blk_h);
1018 1019 1020 1021
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min