me.rs 35.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17
use crate::FrameInvariants;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
18
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
19
use crate::plane::*;
20
use crate::tiling::*;
21
use crate::util::Pixel;
22

Romain Vimont's avatar
Romain Vimont committed
23
use std::ops::{Index, IndexMut};
24
use std::sync::Arc;
25

26
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
27
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
28 29
  use crate::plane::*;
  use crate::util::*;
30
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
31

Raphaël Zumer's avatar
Raphaël Zumer committed
32 33
  use libc;

34 35 36 37 38 39 40 41 42
  macro_rules! declare_asm_sad {
    ($(($name: ident, $T: ident)),+) => (
      $(
        extern { fn $name (
          src: *const $T, src_stride: libc::ptrdiff_t, dst: *const $T,
          dst_stride: libc::ptrdiff_t
        ) -> u32; }
      )+
    )
Luca Barbato's avatar
Luca Barbato committed
43
  }
Kyle Siefring's avatar
Kyle Siefring committed
44

45
  declare_asm_sad![
46
    // SSSE3
47 48 49 50 51 52 53
    (rav1e_sad_4x4_hbd_ssse3, u16),
    (rav1e_sad_8x8_hbd10_ssse3, u16),
    (rav1e_sad_16x16_hbd_ssse3, u16),
    (rav1e_sad_32x32_hbd10_ssse3, u16),
    (rav1e_sad_64x64_hbd10_ssse3, u16),
    (rav1e_sad_128x128_hbd10_ssse3, u16),

54
    // SSE2
55
    (rav1e_sad4x4_sse2, u8),
56 57 58 59
    (rav1e_sad4x8_sse2, u8),
    (rav1e_sad4x16_sse2, u8),

    (rav1e_sad8x4_sse2, u8),
60
    (rav1e_sad8x8_sse2, u8),
61 62 63
    (rav1e_sad8x16_sse2, u8),
    (rav1e_sad8x32_sse2, u8),

64
    (rav1e_sad16x16_sse2, u8),
65

66
    (rav1e_sad32x32_sse2, u8),
67

68
    (rav1e_sad64x64_sse2, u8),
69

70 71
    (rav1e_sad128x128_sse2, u8),

72 73 74
    // AVX
    (rav1e_sad16x4_avx2, u8),
    (rav1e_sad16x8_avx2, u8),
75
    (rav1e_sad16x16_avx2, u8),
76 77 78 79 80
    (rav1e_sad16x32_avx2, u8),
    (rav1e_sad16x64_avx2, u8),

    (rav1e_sad32x8_avx2, u8),
    (rav1e_sad32x16_avx2, u8),
81
    (rav1e_sad32x32_avx2, u8),
82 83 84 85
    (rav1e_sad32x64_avx2, u8),

    (rav1e_sad64x16_avx2, u8),
    (rav1e_sad64x32_avx2, u8),
86
    (rav1e_sad64x64_avx2, u8),
87 88 89
    (rav1e_sad64x128_avx2, u8),

    (rav1e_sad128x64_avx2, u8),
90 91 92
    (rav1e_sad128x128_avx2, u8)
  ];

Luca Barbato's avatar
Luca Barbato committed
93
  #[target_feature(enable = "ssse3")]
94
  unsafe fn sad_hbd_ssse3(
95 96
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_w: usize,
    blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
97 98
  ) -> u32 {
    let mut sum = 0 as u32;
99 100
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
117 118
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
119 120 121
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
122 123
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
124
    }
125
    sum
Kyle Siefring's avatar
Kyle Siefring committed
126 127
  }

128
  #[target_feature(enable = "sse2")]
129
  unsafe fn sad_sse2(
130 131
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
132
  ) -> u32 {
133 134
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
135 136
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
137 138 139 140 141 142 143
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
144
    let step_size = blk_h.min(blk_w).min(ptr_align);
145 146 147 148 149 150 151 152 153
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
154 155 156 157 158
    let mut sum = 0 as u32;
    for r in (0..blk_h as isize).step_by(step_size) {
      for c in (0..blk_w as isize).step_by(step_size) {
        let org_ptr = org_ptr.offset(r * org_stride + c);
        let ref_ptr = ref_ptr.offset(r * ref_stride + c);
159 160 161 162 163 164
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

165 166
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
167 168
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_w: usize,
    blk_h: usize
169
  ) -> u32 {
David Michael Barr's avatar
David Michael Barr committed
170 171
    let org_ptr = plane_org.as_ptr();
    let ref_ptr = plane_ref.as_ptr();
172 173
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204

    let func = match (blk_w, blk_h) {
      (4, 4) => rav1e_sad4x4_sse2,
      (4, 8) => rav1e_sad4x8_sse2,
      (4, 16) => rav1e_sad4x16_sse2,

      (8, 4) => rav1e_sad8x4_sse2,
      (8, 8) => rav1e_sad8x8_sse2,
      (8, 16) => rav1e_sad8x16_sse2,
      (8, 32) => rav1e_sad8x32_sse2,

      (16, 4) => rav1e_sad16x4_avx2,
      (16, 8) => rav1e_sad16x8_avx2,
      (16, 16) => rav1e_sad16x16_avx2,
      (16, 32) => rav1e_sad16x32_avx2,
      (16, 64) => rav1e_sad16x64_avx2,

      (32, 8) => rav1e_sad32x8_avx2,
      (32, 16) => rav1e_sad32x16_avx2,
      (32, 32) => rav1e_sad32x32_avx2,
      (32, 64) => rav1e_sad32x64_avx2,

      (64, 16) => rav1e_sad64x16_avx2,
      (64, 32) => rav1e_sad64x32_avx2,
      (64, 64) => rav1e_sad64x64_avx2,
      (64, 128) => rav1e_sad64x128_avx2,

      (128, 64) => rav1e_sad128x64_avx2,
      (128, 128) => rav1e_sad128x128_avx2,

      _ => unreachable!()
205
    };
206 207
    func(org_ptr, org_stride, ref_ptr, ref_stride)

208 209
  }

Luca Barbato's avatar
Luca Barbato committed
210
  #[inline(always)]
211
  pub fn get_sad<T: Pixel>(
212
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize, blk_h: usize, bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
213
  ) -> u32 {
214
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
215
    {
216
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
217
        return unsafe {
218 219
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
220
          sad_hbd_ssse3(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
221 222
        };
      }
223
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") && blk_h >= 4 && blk_w >= 4 {
224 225 226
        return unsafe {
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
227
          sad_avx2(plane_org, plane_ref, blk_w, blk_h)
228 229
        };
      }
230 231
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
232 233
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
234
          sad_sse2(plane_org, plane_ref, blk_w, blk_h)
235 236
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
237
    }
238
    super::native::get_sad(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
239
  }
240 241 242
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
243
  use crate::plane::*;
244
  use crate::util::*;
245

Luca Barbato's avatar
Luca Barbato committed
246
  #[inline(always)]
247
  pub fn get_sad<T: Pixel>(
248 249
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_w: usize,
    blk_h: usize, _bit_depth: usize
Luca Barbato's avatar
Luca Barbato committed
250 251
  ) -> u32 {
    let mut sum = 0 as u32;
252

Luca Barbato's avatar
Luca Barbato committed
253 254
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
255

Luca Barbato's avatar
Luca Barbato committed
256
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
257 258 259
      sum += slice_org
        .iter()
        .zip(slice_ref)
260
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
261
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
262
    }
263

Luca Barbato's avatar
Luca Barbato committed
264 265
    sum
  }
266 267
}

Romain Vimont's avatar
Romain Vimont committed
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
283 284 285 286 287 288 289 290 291 292

  #[inline(always)]
  pub fn as_tile_motion_vectors(&self) -> TileMotionVectors<'_> {
    TileMotionVectors::new(self, 0, 0, self.cols, self.rows)
  }

  #[inline(always)]
  pub fn as_tile_motion_vectors_mut(&mut self) -> TileMotionVectorsMut<'_> {
    TileMotionVectorsMut::new(self, 0, 0, self.cols, self.rows)
  }
Romain Vimont's avatar
Romain Vimont committed
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

310
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
311
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
312
) -> (isize, isize, isize, isize) {
313 314 315
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
316
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
317
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
318
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
319 320 321 322

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

323
pub fn get_subset_predictors<T: Pixel>(
324
  tile_bo: BlockOffset, cmv: MotionVector,
325
  tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
326
  ref_frame_id: usize
327 328 329
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

330 331 332 333 334 335
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

336 337
  // EPZS subset A and B predictors.

338
  let mut median_preds = Vec::new();
339 340
  if tile_bo.x > 0 {
    let left = tile_mvs[tile_bo.y][tile_bo.x - 1];
341 342
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
343
  }
344 345
  if tile_bo.y > 0 {
    let top = tile_mvs[tile_bo.y - 1][tile_bo.x];
346 347
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
348

349 350
    if tile_bo.x < tile_mvs.cols() - 1 {
      let top_right = tile_mvs[tile_bo.y - 1][tile_bo.x + 1];
351 352
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
353 354 355
    }
  }

356
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
357
    let mut median_mv = MotionVector::default();
358
    for mv in median_preds.iter() {
359 360
      median_mv = median_mv + *mv;
    }
361 362 363
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
364 365 366 367 368
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
369
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
370

371
    let frame_bo = BlockOffset {
372 373
      x: tile_mvs.x() + tile_bo.x,
      y: tile_mvs.y() + tile_bo.y,
374 375 376
    };
    if frame_bo.x > 0 {
      let left = prev_frame_mvs[frame_bo.y][frame_bo.x - 1];
377
      if !left.is_zero() { predictors.push(left); }
378
    }
379 380
    if frame_bo.y > 0 {
      let top = prev_frame_mvs[frame_bo.y - 1][frame_bo.x];
381
      if !top.is_zero() { predictors.push(top); }
382
    }
383 384
    if frame_bo.x < prev_frame_mvs.cols - 1 {
      let right = prev_frame_mvs[frame_bo.y][frame_bo.x + 1];
385
      if !right.is_zero() { predictors.push(right); }
386
    }
387 388
    if frame_bo.y < prev_frame_mvs.rows - 1 {
      let bottom = prev_frame_mvs[frame_bo.y + 1][frame_bo.x];
389
      if !bottom.is_zero() { predictors.push(bottom); }
390 391
    }

392
    let previous = prev_frame_mvs[frame_bo.y][frame_bo.x];
393
    if !previous.is_zero() { predictors.push(previous); }
394 395 396 397 398
  }

  predictors
}

399
pub trait MotionEstimation {
400
  fn full_pixel_me<T: Pixel>(
401
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
402
    bo: BlockOffset, lambda: u32,
403
    cmv: MotionVector, pmv: [MotionVector; 2],
404 405
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
406
    lowest_cost: &mut u64, ref_frame: RefType
407
  );
408

409
  fn sub_pixel_me<T: Pixel>(
410
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
Romain Vimont's avatar
Romain Vimont committed
411
    bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
412 413
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
414
    lowest_cost: &mut u64, ref_frame: RefType
415 416
  );

417
  fn motion_estimation<T: Pixel> (
418
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
Thomas Daede's avatar
Thomas Daede committed
419
    bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
420
    pmv: [MotionVector; 2]
421
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
422
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
423 424 425 426
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
427
        let frame_bo = ts.to_frame_block_offset(bo);
428
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
429
          get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, blk_w, blk_h);
430 431 432 433 434 435 436 437 438

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

439
        Self::full_pixel_me(fi, ts, rec, bo, lambda, cmv, pmv,
440 441
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
442

443
        Self::sub_pixel_me(fi, ts, rec, bo, lambda, pmv,
444
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
445
                           &mut best_mv, &mut lowest_cost, ref_frame);
446 447

        best_mv
Frank Bossen's avatar
Frank Bossen committed
448 449
      }

450
      None => MotionVector::default()
451
    }
452
  }
453 454

  fn estimate_motion_ss2<T: Pixel>(
455
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
456
    bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
457 458 459 460
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
461 462 463
      let bo_adj = adjust_bo(bo, ts.mi_width, ts.mi_height, blk_w, blk_h);
      let frame_bo_adj = ts.to_frame_block_offset(bo_adj);
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo_adj, blk_w, blk_h);
464 465

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
466
      let tile_mvs = &ts.mvs[ref_frame].as_const();
467
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
468 469 470 471 472 473 474 475

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
476
        fi, ts, pmvs, bo_adj,
477
        &tile_mvs, frame_ref_opt, rec, global_mv, lambda,
478 479 480 481 482 483 484 485 486 487 488
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
489
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
490 491
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
    tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
492
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
493 494 495 496
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
497
}
498

499 500 501
pub struct DiamondSearch {}
pub struct FullSearch {}

502 503
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
504
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
505
    bo: BlockOffset, lambda: u32,
506 507
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
508
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
509
  ) {
510
    let tile_mvs = &ts.mvs[ref_frame.to_index()].as_const();
511
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
512
    let predictors =
513
      get_subset_predictors(bo, cmv, &tile_mvs, frame_ref, ref_frame.to_index());
514

515
    let frame_bo = ts.to_frame_block_offset(bo);
516 517
    diamond_me_search(
      fi,
518 519
      frame_bo.to_luma_plane_offset(),
      &ts.input.planes[0],
520 521 522 523 524 525 526 527 528 529 530 531 532
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
533
      false,
534 535 536
      ref_frame
    );
  }
537 538

  fn sub_pixel_me<T: Pixel>(
539
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
540
    bo: BlockOffset, lambda: u32,
541 542
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
543
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
544 545 546
  )
  {
    let predictors = vec![*best_mv];
547
    let frame_bo = ts.to_frame_block_offset(bo);
548 549
    diamond_me_search(
      fi,
550 551
      frame_bo.to_luma_plane_offset(),
      &ts.input.planes[0],
552 553 554 555 556 557 558 559 560 561 562 563 564
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
565
      true,
566 567 568
      ref_frame
    );
  }
569 570

  fn me_ss2<T: Pixel>(
571
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
572
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
573
    tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
574
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
575 576 577 578
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
579
    let frame_bo_adj = ts.to_frame_block_offset(bo_adj);
580
    let po = PlaneOffset {
581 582
      x: (frame_bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (frame_bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
583
    };
584 585 586
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
587
          bo_adj,
588
          MotionVector{row: pmv.row, col: pmv.col},
589
          &tile_mvs, frame_ref_opt, 0
590 591 592 593 594 595 596 597
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
Romain Vimont's avatar
Romain Vimont committed
598
          fi, po,
599
          &ts.input_hres, &rec.input_hres,
600 601 602 603 604
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
605
          false, LAST_FRAME
606 607 608 609
        );
      }
    }
  }
610 611 612 613
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
614
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
615
    bo: BlockOffset, lambda: u32,
616 617
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
618
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
619
  ) {
620 621
    let frame_bo = ts.to_frame_block_offset(bo);
    let po = frame_bo.to_luma_plane_offset();
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
639
      &ts.input.planes[0],
640 641 642
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
643
      po,
644 645 646 647 648 649 650
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
651 652

  fn sub_pixel_me<T: Pixel>(
653
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, _rec: &ReferenceFrame<T>,
654
    bo: BlockOffset, lambda: u32,
655
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
656
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
657
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
658 659
  )
  {
660
    let frame_bo = ts.to_frame_block_offset(bo);
661 662
    telescopic_subpel_search(
      fi,
663 664
      ts,
      frame_bo.to_luma_plane_offset(),
665 666 667 668 669 670 671
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
672 673
      blk_w,
      blk_h,
674 675 676 677
      best_mv,
      lowest_cost
    );
  }
678 679

  fn me_ss2<T: Pixel>(
680
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
681
    pmvs: &[Option<MotionVector>; 3], bo_adj: BlockOffset,
682
    _tile_mvs: &TileMotionVectors<'_>, _frame_ref_opt: Option<&ReferenceFrame<T>>,
683
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
684 685 686 687
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
688
    let frame_bo_adj = ts.to_frame_block_offset(bo_adj);
689
    let po = PlaneOffset {
690 691
      x: (frame_bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (frame_bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
692
    };
693 694 695 696 697 698 699 700 701 702 703 704 705 706
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
707
          &ts.input_hres,
708 709 710
          &rec.input_hres,
          best_mv,
          lowest_cost,
Romain Vimont's avatar
Romain Vimont committed
711
          po,
712 713 714 715 716 717 718 719 720
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
721
}
722

723 724
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
725
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
726
  predictors: &[MotionVector],
727
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
728 729
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
730
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
731
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
732
  *center_mv = MotionVector::default();
733 734 735 736 737 738
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
739
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
740 741 742 743 744 745 746 747

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

748 749
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
750
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
751
  predictors: &[MotionVector],
752
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
753 754
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
755
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
756
  subpixel: bool, ref_frame: RefType)
757 758
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
759 760
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
761
      // Sub-pixel motion estimation
762 763 764 765 766
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
767 768
    } else {
      // Full pixel motion estimation
769
      (16i16, 8i16, None)
770 771
    }
  };
772 773 774 775

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
776
    blk_w, blk_h, center_mv, center_mv_cost,
777
    &mut tmp_plane_opt, ref_frame);
778 779 780

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
781
    let mut best_diamond_mv = MotionVector::default();
782 783 784 785 786 787 788 789 790

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
791
          fi, po, p_org, p_ref, bit_depth,
792
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
793
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
794 795 796 797 798 799 800 801

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
802
      if diamond_radius == diamond_radius_end {
803 804 805 806 807 808 809 810 811 812 813 814 815 816
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

817 818
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
819
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
820
  pmv: [MotionVector; 2], lambda: u32,
821 822
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
823
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
824
  ref_frame: RefType) -> u64
825 826 827 828 829 830 831 832 833 834
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

835
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
Romain Vimont's avatar
Romain Vimont committed
836
    let mut tmp_slice = &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
837 838 839
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
840
      po,
841 842 843 844 845 846
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
847
    let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
848 849 850 851 852 853
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
854
    let plane_ref = p_ref.slice(PlaneOffset {
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
872
  let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
873 874 875 876 877 878 879 880

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

881
fn telescopic_subpel_search<T: Pixel>(
882
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
883
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
884
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
885
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
886
  best_mv: &mut MotionVector, lowest_cost: &mut u64
887 888 889 890 891 892 893 894
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
895 896
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
Romain Vimont's avatar
Romain Vimont committed
920
            &mut tmp_plane.mut_slice(PlaneOffset { x: 0, y: 0 });
921 922 923 924

          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
925
            po,
926 927 928 929 930 931 932 933
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

934
        let plane_org = ts.input.planes[0].slice(po);
Romain Vimont's avatar
Romain Vimont committed
935
        let plane_ref = tmp_plane.slice(PlaneOffset { x: 0, y: 0 });
936

937
        let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, fi.sequence.bit_depth);
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

953
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
954
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
955
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
956
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
957
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
958
) {
Luca Barbato's avatar
Luca Barbato committed
959 960 961 962 963
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
964
      let plane_org = p_org.slice(po);
Romain Vimont's avatar
Romain Vimont committed
965
      let plane_ref = p_ref.slice(PlaneOffset { x, y });
966

967
      let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
968

Frank Bossen's avatar
Frank Bossen committed
969 970 971 972 973 974 975 976
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
977
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
978

Luca Barbato's avatar
Luca Barbato committed
979 980 981 982 983
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
984 985
}

986 987
// Adjust block offset such that entire block lies within boundaries
fn adjust_bo(bo: BlockOffset, mi_width: usize, mi_height: usize, blk_w: usize, blk_h: usize) -> BlockOffset {
988
  BlockOffset {
989 990
    x: (bo.x as isize).min(mi_width as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(mi_height as isize - blk_h as isize / 4).max(0) as usize
991 992 993
  }
}

994
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
995
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
996
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

1009
pub fn estimate_motion_ss4<T: Pixel>(
1010
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, ref_idx: usize,
Romain Vimont's avatar
Romain Vimont committed
1011
  bo: BlockOffset
1012 1013 1014 1015
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
1016 1017
    let bo_adj = adjust_bo(bo, ts.mi_width, ts.mi_height, blk_w, blk_h);
    let frame_bo_adj = ts.to_frame_block_offset(bo_adj);
1018
    let po = PlaneOffset {
1019 1020
      x: (frame_bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (frame_bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
1021
    };
1022