me.rs 35.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17
use crate::FrameInvariants;
use crate::partition::*;
Thomas Daede's avatar
Thomas Daede committed
18
use crate::partition::RefType::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
19
use crate::plane::*;
20
use crate::tiling::*;
21
use crate::util::Pixel;
22

Romain Vimont's avatar
Romain Vimont committed
23
use std::ops::{Index, IndexMut};
24
use std::sync::Arc;
25

26
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
27
mod nasm {
Romain Vimont's avatar
Romain Vimont committed
28
  use crate::tiling::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
29
  use crate::util::*;
30
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
31

Raphaël Zumer's avatar
Raphaël Zumer committed
32 33
  use libc;

34 35 36 37 38 39 40 41 42
  macro_rules! declare_asm_sad {
    ($(($name: ident, $T: ident)),+) => (
      $(
        extern { fn $name (
          src: *const $T, src_stride: libc::ptrdiff_t, dst: *const $T,
          dst_stride: libc::ptrdiff_t
        ) -> u32; }
      )+
    )
Luca Barbato's avatar
Luca Barbato committed
43
  }
Kyle Siefring's avatar
Kyle Siefring committed
44

45
  declare_asm_sad![
46
    // SSSE3
47 48 49 50 51 52 53
    (rav1e_sad_4x4_hbd_ssse3, u16),
    (rav1e_sad_8x8_hbd10_ssse3, u16),
    (rav1e_sad_16x16_hbd_ssse3, u16),
    (rav1e_sad_32x32_hbd10_ssse3, u16),
    (rav1e_sad_64x64_hbd10_ssse3, u16),
    (rav1e_sad_128x128_hbd10_ssse3, u16),

54
    // SSE2
55
    (rav1e_sad4x4_sse2, u8),
56 57 58 59
    (rav1e_sad4x8_sse2, u8),
    (rav1e_sad4x16_sse2, u8),

    (rav1e_sad8x4_sse2, u8),
60
    (rav1e_sad8x8_sse2, u8),
61 62 63
    (rav1e_sad8x16_sse2, u8),
    (rav1e_sad8x32_sse2, u8),

64
    (rav1e_sad16x16_sse2, u8),
65

66
    (rav1e_sad32x32_sse2, u8),
67

68
    (rav1e_sad64x64_sse2, u8),
69

70 71
    (rav1e_sad128x128_sse2, u8),

72 73 74
    // AVX
    (rav1e_sad16x4_avx2, u8),
    (rav1e_sad16x8_avx2, u8),
75
    (rav1e_sad16x16_avx2, u8),
76 77 78 79 80
    (rav1e_sad16x32_avx2, u8),
    (rav1e_sad16x64_avx2, u8),

    (rav1e_sad32x8_avx2, u8),
    (rav1e_sad32x16_avx2, u8),
81
    (rav1e_sad32x32_avx2, u8),
82 83 84 85
    (rav1e_sad32x64_avx2, u8),

    (rav1e_sad64x16_avx2, u8),
    (rav1e_sad64x32_avx2, u8),
86
    (rav1e_sad64x64_avx2, u8),
87 88 89
    (rav1e_sad64x128_avx2, u8),

    (rav1e_sad128x64_avx2, u8),
90 91 92
    (rav1e_sad128x128_avx2, u8)
  ];

Luca Barbato's avatar
Luca Barbato committed
93
  #[target_feature(enable = "ssse3")]
94
  unsafe fn sad_hbd_ssse3(
Romain Vimont's avatar
Romain Vimont committed
95 96 97 98 99
    plane_org: &PlaneRegion<'_, u16>,
    plane_ref: &PlaneRegion<'_, u16>,
    blk_w: usize,
    blk_h: usize,
    bit_depth: usize,
Luca Barbato's avatar
Luca Barbato committed
100 101
  ) -> u32 {
    let mut sum = 0 as u32;
Romain Vimont's avatar
Romain Vimont committed
102 103
    let org_stride = (plane_org.plane_cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane_cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
104 105 106 107 108 109 110 111 112 113 114 115 116 117
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
118
        // FIXME for now, T == u16
Romain Vimont's avatar
Romain Vimont committed
119 120
        let org_ptr = &plane_org[r][c] as *const u16;
        let ref_ptr = &plane_ref[r][c] as *const u16;
Luca Barbato's avatar
Luca Barbato committed
121 122
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
123
    }
124
    sum
Kyle Siefring's avatar
Kyle Siefring committed
125 126
  }

127
  #[target_feature(enable = "sse2")]
128
  unsafe fn sad_sse2(
Romain Vimont's avatar
Romain Vimont committed
129 130 131 132
    plane_org: &PlaneRegion<'_, u8>,
    plane_ref: &PlaneRegion<'_, u8>,
    blk_w: usize,
    blk_h: usize,
133
  ) -> u32 {
Romain Vimont's avatar
Romain Vimont committed
134 135 136 137
    let org_ptr = plane_org.data_ptr();
    let ref_ptr = plane_ref.data_ptr();
    let org_stride = plane_org.plane_cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane_cfg.stride as libc::ptrdiff_t;
138 139 140 141 142 143 144
    if blk_w == 16 && blk_h == 16 && (org_ptr as usize & 15) == 0 {
      return rav1e_sad16x16_sse2(org_ptr, org_stride, ref_ptr, ref_stride);
    }
    // Note: unaligned blocks come from hres/qres ME search
    let ptr_align_log2 = (org_ptr as usize).trailing_zeros() as usize;
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
145
    let step_size = blk_h.min(blk_w).min(ptr_align);
146 147 148 149 150 151 152 153 154
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
155
    let mut sum = 0 as u32;
Romain Vimont's avatar
Romain Vimont committed
156 157 158 159
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_ptr = &plane_org[r][c] as *const u8;
        let ref_ptr = &plane_ref[r][c] as *const u8;
160 161 162 163 164 165
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

166 167
  #[target_feature(enable = "avx2")]
  unsafe fn sad_avx2(
Romain Vimont's avatar
Romain Vimont committed
168 169 170 171
    plane_org: &PlaneRegion<'_, u8>,
    plane_ref: &PlaneRegion<'_, u8>,
    blk_w: usize,
    blk_h: usize,
172
  ) -> u32 {
Romain Vimont's avatar
Romain Vimont committed
173 174 175 176
    let org_ptr = plane_org.data_ptr();
    let ref_ptr = plane_ref.data_ptr();
    let org_stride = plane_org.plane_cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane_cfg.stride as libc::ptrdiff_t;
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207

    let func = match (blk_w, blk_h) {
      (4, 4) => rav1e_sad4x4_sse2,
      (4, 8) => rav1e_sad4x8_sse2,
      (4, 16) => rav1e_sad4x16_sse2,

      (8, 4) => rav1e_sad8x4_sse2,
      (8, 8) => rav1e_sad8x8_sse2,
      (8, 16) => rav1e_sad8x16_sse2,
      (8, 32) => rav1e_sad8x32_sse2,

      (16, 4) => rav1e_sad16x4_avx2,
      (16, 8) => rav1e_sad16x8_avx2,
      (16, 16) => rav1e_sad16x16_avx2,
      (16, 32) => rav1e_sad16x32_avx2,
      (16, 64) => rav1e_sad16x64_avx2,

      (32, 8) => rav1e_sad32x8_avx2,
      (32, 16) => rav1e_sad32x16_avx2,
      (32, 32) => rav1e_sad32x32_avx2,
      (32, 64) => rav1e_sad32x64_avx2,

      (64, 16) => rav1e_sad64x16_avx2,
      (64, 32) => rav1e_sad64x32_avx2,
      (64, 64) => rav1e_sad64x64_avx2,
      (64, 128) => rav1e_sad64x128_avx2,

      (128, 64) => rav1e_sad128x64_avx2,
      (128, 128) => rav1e_sad128x128_avx2,

      _ => unreachable!()
208
    };
209 210
    func(org_ptr, org_stride, ref_ptr, ref_stride)

211 212
  }

Luca Barbato's avatar
Luca Barbato committed
213
  #[inline(always)]
214
  pub fn get_sad<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
215 216 217 218 219
    plane_org: &PlaneRegion<'_, T>,
    plane_ref: &PlaneRegion<'_, T>,
    blk_w: usize,
    blk_h: usize,
    bit_depth: usize,
Luca Barbato's avatar
Luca Barbato committed
220
  ) -> u32 {
221
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
222
    {
223
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
224
        return unsafe {
Romain Vimont's avatar
Romain Vimont committed
225 226
          let plane_org = &*(plane_org as *const _ as *const PlaneRegion<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneRegion<'_, u16>);
227
          sad_hbd_ssse3(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
228 229
        };
      }
230
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") && blk_h >= 4 && blk_w >= 4 {
231
        return unsafe {
Romain Vimont's avatar
Romain Vimont committed
232 233
          let plane_org = &*(plane_org as *const _ as *const PlaneRegion<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneRegion<'_, u8>);
234
          sad_avx2(plane_org, plane_ref, blk_w, blk_h)
235 236
        };
      }
237 238
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
Romain Vimont's avatar
Romain Vimont committed
239 240
          let plane_org = &*(plane_org as *const _ as *const PlaneRegion<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneRegion<'_, u8>);
241
          sad_sse2(plane_org, plane_ref, blk_w, blk_h)
242 243
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
244
    }
245
    super::native::get_sad(plane_org, plane_ref, blk_w, blk_h, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
246
  }
247 248 249
}

mod native {
Romain Vimont's avatar
Romain Vimont committed
250
  use crate::tiling::*;
251
  use crate::util::*;
252

Luca Barbato's avatar
Luca Barbato committed
253
  #[inline(always)]
254
  pub fn get_sad<T: Pixel>(
Romain Vimont's avatar
Romain Vimont committed
255 256 257 258 259
    plane_org: &PlaneRegion<'_, T>,
    plane_ref: &PlaneRegion<'_, T>,
    blk_w: usize,
    blk_h: usize,
    _bit_depth: usize,
Luca Barbato's avatar
Luca Barbato committed
260 261
  ) -> u32 {
    let mut sum = 0 as u32;
262

Romain Vimont's avatar
Romain Vimont committed
263
    for (slice_org, slice_ref) in plane_org.rows_iter().take(blk_h).zip(plane_ref.rows_iter()) {
264 265
      sum += slice_org
        .iter()
Romain Vimont's avatar
Romain Vimont committed
266
        .take(blk_w)
267
        .zip(slice_ref)
268
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
269
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
270
    }
271

Luca Barbato's avatar
Luca Barbato committed
272 273
    sum
  }
274 275
}

Romain Vimont's avatar
Romain Vimont committed
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
291 292 293 294 295 296 297 298 299 300

  #[inline(always)]
  pub fn as_tile_motion_vectors(&self) -> TileMotionVectors<'_> {
    TileMotionVectors::new(self, 0, 0, self.cols, self.rows)
  }

  #[inline(always)]
  pub fn as_tile_motion_vectors_mut(&mut self) -> TileMotionVectorsMut<'_> {
    TileMotionVectorsMut::new(self, 0, 0, self.cols, self.rows)
  }
Romain Vimont's avatar
Romain Vimont committed
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

318
fn get_mv_range(
Romain Vimont's avatar
Romain Vimont committed
319
  w_in_b: usize, h_in_b: usize, bo: BlockOffset, blk_w: usize, blk_h: usize
320
) -> (isize, isize, isize, isize) {
321 322 323
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
324
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
325
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
326
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
327 328 329 330

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

331
pub fn get_subset_predictors<T: Pixel>(
332
  tile_bo: BlockOffset, cmv: MotionVector,
333
  tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
334
  ref_frame_id: usize
335 336 337
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

338 339 340 341 342 343
  // Zero motion vector
  predictors.push(MotionVector::default());

  // Coarse motion estimation.
  predictors.push(cmv.quantize_to_fullpel());

344 345
  // EPZS subset A and B predictors.

346
  let mut median_preds = Vec::new();
347 348
  if tile_bo.x > 0 {
    let left = tile_mvs[tile_bo.y][tile_bo.x - 1];
349 350
    median_preds.push(left);
    if !left.is_zero() { predictors.push(left); }
351
  }
352 353
  if tile_bo.y > 0 {
    let top = tile_mvs[tile_bo.y - 1][tile_bo.x];
354 355
    median_preds.push(top);
    if !top.is_zero() { predictors.push(top); }
356

357 358
    if tile_bo.x < tile_mvs.cols() - 1 {
      let top_right = tile_mvs[tile_bo.y - 1][tile_bo.x + 1];
359 360
      median_preds.push(top_right);
      if !top_right.is_zero() { predictors.push(top_right); }
361 362 363
    }
  }

364
  if !median_preds.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
365
    let mut median_mv = MotionVector::default();
366
    for mv in median_preds.iter() {
367 368
      median_mv = median_mv + *mv;
    }
369 370 371
    median_mv = median_mv / (median_preds.len() as i16);
    let median_mv_quant = median_mv.quantize_to_fullpel();
    if !median_mv_quant.is_zero() { predictors.push(median_mv_quant); }
372 373 374 375 376
  }

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
377
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_frame_id];
378

379
    let frame_bo = BlockOffset {
380 381
      x: tile_mvs.x() + tile_bo.x,
      y: tile_mvs.y() + tile_bo.y,
382 383 384
    };
    if frame_bo.x > 0 {
      let left = prev_frame_mvs[frame_bo.y][frame_bo.x - 1];
385
      if !left.is_zero() { predictors.push(left); }
386
    }
387 388
    if frame_bo.y > 0 {
      let top = prev_frame_mvs[frame_bo.y - 1][frame_bo.x];
389
      if !top.is_zero() { predictors.push(top); }
390
    }
391 392
    if frame_bo.x < prev_frame_mvs.cols - 1 {
      let right = prev_frame_mvs[frame_bo.y][frame_bo.x + 1];
393
      if !right.is_zero() { predictors.push(right); }
394
    }
395 396
    if frame_bo.y < prev_frame_mvs.rows - 1 {
      let bottom = prev_frame_mvs[frame_bo.y + 1][frame_bo.x];
397
      if !bottom.is_zero() { predictors.push(bottom); }
398 399
    }

400
    let previous = prev_frame_mvs[frame_bo.y][frame_bo.x];
401
    if !previous.is_zero() { predictors.push(previous); }
402 403 404 405 406
  }

  predictors
}

407
pub trait MotionEstimation {
408
  fn full_pixel_me<T: Pixel>(
409
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
410
    tile_bo: BlockOffset, lambda: u32,
411
    cmv: MotionVector, pmv: [MotionVector; 2],
412 413
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
414
    lowest_cost: &mut u64, ref_frame: RefType
415
  );
416

417
  fn sub_pixel_me<T: Pixel>(
418
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
419
    tile_bo: BlockOffset, lambda: u32, pmv: [MotionVector; 2],
420 421
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
Thomas Daede's avatar
Thomas Daede committed
422
    lowest_cost: &mut u64, ref_frame: RefType
423 424
  );

425
  fn motion_estimation<T: Pixel> (
426
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
427
    tile_bo: BlockOffset, ref_frame: RefType, cmv: MotionVector,
428
    pmv: [MotionVector; 2]
429
  ) -> MotionVector {
Thomas Daede's avatar
Thomas Daede committed
430
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
431 432 433 434
    {
      Some(ref rec) => {
        let blk_w = bsize.width();
        let blk_h = bsize.height();
435
        let frame_bo = ts.to_frame_block_offset(tile_bo);
436
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
437
          get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, blk_w, blk_h);
438 439 440 441 442 443 444 445 446

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

447
        Self::full_pixel_me(fi, ts, rec, tile_bo, lambda, cmv, pmv,
448 449
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
450

451
        Self::sub_pixel_me(fi, ts, rec, tile_bo, lambda, pmv,
452
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
453
                           &mut best_mv, &mut lowest_cost, ref_frame);
454 455

        best_mv
Frank Bossen's avatar
Frank Bossen committed
456 457
      }

458
      None => MotionVector::default()
459
    }
460
  }
461 462

  fn estimate_motion_ss2<T: Pixel>(
463
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, ref_idx: usize,
464
    tile_bo: BlockOffset, pmvs: &[Option<MotionVector>; 3], ref_frame: usize
465 466 467 468
  ) -> Option<MotionVector> {
    if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
      let blk_w = bsize.width();
      let blk_h = bsize.height();
469 470
      let tile_bo_adj = adjust_bo(tile_bo, ts.mi_width, ts.mi_height, blk_w, blk_h);
      let frame_bo_adj = ts.to_frame_block_offset(tile_bo_adj);
471
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo_adj, blk_w, blk_h);
472 473

      let global_mv = [MotionVector{row: 0, col: 0}; 2];
474
      let tile_mvs = &ts.mvs[ref_frame].as_const();
475
      let frame_ref_opt = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
476 477 478 479 480 481 482 483

      let mut lowest_cost = std::u64::MAX;
      let mut best_mv = MotionVector::default();

      // Divide by 4 to account for subsampling, 0.125 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;

      Self::me_ss2(
484
        fi, ts, pmvs, tile_bo_adj,
Romain Vimont's avatar
Romain Vimont committed
485
        tile_mvs, frame_ref_opt, rec, global_mv, lambda,
486 487 488 489 490 491 492 493 494 495 496
        mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
        &mut best_mv, &mut lowest_cost
      );

      Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
    } else {
      None
    }
  }

  fn me_ss2<T: Pixel>(
497
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
498
    pmvs: &[Option<MotionVector>; 3], tile_bo_adj: BlockOffset,
499
    tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
500
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
501 502 503 504
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  );
505
}
506

507 508 509
pub struct DiamondSearch {}
pub struct FullSearch {}

510 511
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
512
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
513
    tile_bo: BlockOffset, lambda: u32,
514 515
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
516
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType
517
  ) {
518
    let tile_mvs = &ts.mvs[ref_frame.to_index()].as_const();
519
    let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize].as_ref().map(Arc::as_ref);
520
    let predictors =
Romain Vimont's avatar
Romain Vimont committed
521
      get_subset_predictors(tile_bo, cmv, tile_mvs, frame_ref, ref_frame.to_index());
522

523
    let frame_bo = ts.to_frame_block_offset(tile_bo);
524 525
    diamond_me_search(
      fi,
526 527
      frame_bo.to_luma_plane_offset(),
      &ts.input.planes[0],
528 529 530 531 532 533 534 535 536 537 538 539 540
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
541
      false,
542 543 544
      ref_frame
    );
  }
545 546

  fn sub_pixel_me<T: Pixel>(
547
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
548
    tile_bo: BlockOffset, lambda: u32,
549 550
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
551
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
552 553 554
  )
  {
    let predictors = vec![*best_mv];
555
    let frame_bo = ts.to_frame_block_offset(tile_bo);
556 557
    diamond_me_search(
      fi,
558 559
      frame_bo.to_luma_plane_offset(),
      &ts.input.planes[0],
560 561 562 563 564 565 566 567 568 569 570 571 572
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
573
      true,
574 575 576
      ref_frame
    );
  }
577 578

  fn me_ss2<T: Pixel>(
579
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
580
    pmvs: &[Option<MotionVector>; 3], tile_bo_adj: BlockOffset,
581
    tile_mvs: &TileMotionVectors<'_>, frame_ref_opt: Option<&ReferenceFrame<T>>,
582
    rec: &ReferenceFrame<T>, global_mv: [MotionVector; 2], lambda: u32,
583 584 585 586
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
587 588
    let frame_bo_adj = ts.to_frame_block_offset(tile_bo_adj);
    let frame_po = PlaneOffset {
589 590
      x: (frame_bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (frame_bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
591
    };
592 593 594
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let mut predictors = get_subset_predictors::<T>(
595
          tile_bo_adj,
596
          MotionVector{row: pmv.row, col: pmv.col},
597
          &tile_mvs, frame_ref_opt, 0
598 599 600 601 602 603 604 605
        );

        for predictor in &mut predictors {
          predictor.row >>= 1;
          predictor.col >>= 1;
        }

        diamond_me_search(
606
          fi, frame_po,
607
          &ts.input_hres, &rec.input_hres,
608 609 610 611 612
          &predictors, fi.sequence.bit_depth,
          global_mv, lambda,
          mvx_min >> 1, mvx_max >> 1, mvy_min >> 1, mvy_max >> 1,
          blk_w >> 1, blk_h >> 1,
          best_mv, lowest_cost,
Thomas Daede's avatar
Thomas Daede committed
613
          false, LAST_FRAME
614 615 616 617
        );
      }
    }
  }
618 619 620 621
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
622
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, rec: &ReferenceFrame<T>,
623
    tile_bo: BlockOffset, lambda: u32,
624 625
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
626
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: RefType
627
  ) {
628 629
    let frame_bo = ts.to_frame_block_offset(tile_bo);
    let frame_po = frame_bo.to_luma_plane_offset();
630
    let range = 16;
631
    let x_lo = frame_po.x
632
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
633
    let x_hi = frame_po.x
634
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
635
    let y_lo = frame_po.y
636
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
637
    let y_hi = frame_po.y
638 639 640 641 642 643 644 645 646
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
647
      &ts.input.planes[0],
648 649 650
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
651
      frame_po,
652 653 654 655 656 657 658
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
659 660

  fn sub_pixel_me<T: Pixel>(
661
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, _rec: &ReferenceFrame<T>,
662
    tile_bo: BlockOffset, lambda: u32,
663
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
664
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
Thomas Daede's avatar
Thomas Daede committed
665
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: RefType,
666 667
  )
  {
668
    let frame_bo = ts.to_frame_block_offset(tile_bo);
669 670
    telescopic_subpel_search(
      fi,
671 672
      ts,
      frame_bo.to_luma_plane_offset(),
673 674 675 676 677 678 679
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
680 681
      blk_w,
      blk_h,
682 683 684 685
      best_mv,
      lowest_cost
    );
  }
686 687

  fn me_ss2<T: Pixel>(
688
    fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
689
    pmvs: &[Option<MotionVector>; 3], tile_bo_adj: BlockOffset,
690
    _tile_mvs: &TileMotionVectors<'_>, _frame_ref_opt: Option<&ReferenceFrame<T>>,
691
    rec: &ReferenceFrame<T>, _global_mv: [MotionVector; 2], lambda: u32,
692 693 694 695
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64
  ) {
696 697
    let frame_bo_adj = ts.to_frame_block_offset(tile_bo_adj);
    let frame_po = PlaneOffset {
698 699
      x: (frame_bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (frame_bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
700
    };
701 702 703
    let range = 16;
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
704 705 706 707
        let x_lo = frame_po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = frame_po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = frame_po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = frame_po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
708 709 710 711 712 713 714
        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
715
          &ts.input_hres,
716 717 718
          &rec.input_hres,
          best_mv,
          lowest_cost,
719
          frame_po,
720 721 722 723 724 725 726 727 728
          1,
          fi.sequence.bit_depth,
          lambda,
          [MotionVector::default(); 2],
          fi.allow_high_precision_mv
        );
      }
    }
  }
729
}
730

731 732
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
733
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
734
  predictors: &[MotionVector],
735
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
736 737
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
738
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
739
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: RefType) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
740
  *center_mv = MotionVector::default();
741 742 743 744 745 746
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
747
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
748 749 750 751 752 753 754 755

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

756 757
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
758
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
759
  predictors: &[MotionVector],
760
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
761 762
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
763
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
Thomas Daede's avatar
Thomas Daede committed
764
  subpixel: bool, ref_frame: RefType)
765 766
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
767 768
  let (mut diamond_radius, diamond_radius_end, mut tmp_plane_opt) = {
    if subpixel {
769
      // Sub-pixel motion estimation
770 771 772 773 774
      (
        4i16,
        if fi.allow_high_precision_mv {1i16} else {2i16},
        Some(Plane::new(blk_w, blk_h, 0, 0, 0, 0)),
      )
775 776
    } else {
      // Full pixel motion estimation
777
      (16i16, 8i16, None)
778 779
    }
  };
780 781 782 783

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
784
    blk_w, blk_h, center_mv, center_mv_cost,
785
    &mut tmp_plane_opt, ref_frame);
786 787 788

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
789
    let mut best_diamond_mv = MotionVector::default();
790 791 792 793 794 795 796 797 798

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
Romain Vimont's avatar
Romain Vimont committed
799
          fi, po, p_org, p_ref, bit_depth,
800
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
801
          blk_w, blk_h, cand_mv, &mut tmp_plane_opt, ref_frame);
802 803 804 805 806 807 808 809

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
810
      if diamond_radius == diamond_radius_end {
811 812 813 814 815 816 817 818 819 820 821 822 823 824
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

825 826
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
Romain Vimont's avatar
Romain Vimont committed
827
  po: PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
828
  pmv: [MotionVector; 2], lambda: u32,
829 830
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
831
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
Thomas Daede's avatar
Thomas Daede committed
832
  ref_frame: RefType) -> u64
833 834 835 836 837 838 839 840
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

Romain Vimont's avatar
Romain Vimont committed
841
  let plane_org = p_org.region(Area::StartingAt { x: po.x, y: po.y });
842

843 844 845 846
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
Romain Vimont's avatar
Romain Vimont committed
847
      po,
Romain Vimont's avatar
Romain Vimont committed
848
      &mut tmp_plane.as_region_mut(),
849 850 851 852 853
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
Romain Vimont's avatar
Romain Vimont committed
854
    let plane_ref = tmp_plane.as_region();
855 856 857 858 859 860
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
Romain Vimont's avatar
Romain Vimont committed
861
    let plane_ref = p_ref.region(Area::StartingAt {
862 863 864 865 866 867 868 869 870 871 872 873 874 875
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
Romain Vimont's avatar
Romain Vimont committed
876
  plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>
877 878
) -> u64
{
879
  let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
880 881 882 883 884 885 886 887

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

888
fn telescopic_subpel_search<T: Pixel>(
889
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, po: PlaneOffset,
Thomas Daede's avatar
Thomas Daede committed
890
  lambda: u32, ref_frame: RefType, pmv: [MotionVector; 2],
891
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
892
  blk_w: usize, blk_h: usize,
Romain Vimont's avatar
Romain Vimont committed
893
  best_mv: &mut MotionVector, lowest_cost: &mut u64
894 895 896 897 898 899 900 901
) {
  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

Romain Vimont's avatar
Romain Vimont committed
902 903
  let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);

904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          mode.predict_inter(
            fi,
            0,
Romain Vimont's avatar
Romain Vimont committed
929
            po,
Romain Vimont's avatar
Romain Vimont committed
930
            &mut tmp_plane.as_region_mut(),
931 932 933 934 935 936 937
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

Romain Vimont's avatar
Romain Vimont committed
938 939
        let plane_org = ts.input.planes[0].region(Area::StartingAt { x: po.x, y: po.y });
        let plane_ref = tmp_plane.as_region();
940

941
        let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, fi.sequence.bit_depth);
942 943 944 945 946 947 948 949 950 951 952 953 954 955 956

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

957
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
958
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
959
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
Romain Vimont's avatar
Romain Vimont committed
960
  lowest_cost: &mut u64, po: PlaneOffset, step: usize, bit_depth: usize,
961
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
962
) {
Luca Barbato's avatar
Luca Barbato committed
963 964 965 966 967
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
Romain Vimont's avatar
Romain Vimont committed
968 969
      let plane_org = p_org.region(Area::StartingAt { x: po.x, y: po.y });
      let plane_ref = p_ref.region(Area::StartingAt { x, y });
970
      let sad = get_sad(&plane_org, &plane_ref, blk_w, blk_h, bit_depth);
971

Frank Bossen's avatar
Frank Bossen committed
972 973 974 975 976 977 978 979
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
980
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
981

Luca Barbato's avatar
Luca Barbato committed
982 983 984 985 986
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
987 988
}

989 990
// Adjust block offset such that entire block lies within boundaries
fn adjust_bo(bo: BlockOffset, mi_width: usize, mi_height: usize, blk_w: usize, blk_h: usize) -> BlockOffset {
991
  BlockOffset {
992 993
    x: (bo.x as isize).min(mi_width as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(mi_height as isize - blk_h as isize / 4).max(0) as usize
994 995 996
  }
}

997
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
998
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
999
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

1012
pub fn estimate_motion_ss4<T: Pixel>(
1013
  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, ref_idx: usize,