me.rs 26.8 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Raphaël Zumer's avatar
Raphaël Zumer committed
10 11 12 13
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
pub use self::nasm::get_sad;
#[cfg(any(not(target_arch = "x86_64"), windows, not(feature = "nasm")))]
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18 19
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
use crate::plane::*;
20
use crate::util::Pixel;
21 22

use std::sync::Arc;
23

24 25
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
26 27
  use crate::plane::*;
  use crate::util::*;
28
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
29

Raphaël Zumer's avatar
Raphaël Zumer committed
30 31
  use libc;

Luca Barbato's avatar
Luca Barbato committed
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
  extern {
    fn rav1e_sad_4x4_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_8x8_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_16x16_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_32x32_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_64x64_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_128x128_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91

    fn rav1e_sad4x4_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad8x8_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad16x16_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad32x32_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad64x64_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad128x128_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
Luca Barbato's avatar
Luca Barbato committed
92
  }
Kyle Siefring's avatar
Kyle Siefring committed
93

Luca Barbato's avatar
Luca Barbato committed
94
  #[target_feature(enable = "ssse3")]
95 96
  unsafe fn sad_ssse3<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
97 98
    blk_w: usize, bit_depth: usize
  ) -> u32 {
99
    assert!(mem::size_of::<T>() == 2, "only implemented for u16 for now");
Luca Barbato's avatar
Luca Barbato committed
100
    let mut sum = 0 as u32;
101 102
    let org_stride = (plane_org.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
119 120
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
121 122 123
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
124 125
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
126
    }
127
    sum
Kyle Siefring's avatar
Kyle Siefring committed
128 129
  }

130 131 132 133 134 135 136 137
  #[target_feature(enable = "sse2")]
  unsafe fn sad_sse2<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
    blk_w: usize
  ) -> u32 {
    assert!(mem::size_of::<T>() == 1, "only implemented for u8 for now");
    // FIXME unaligned blocks coming from hres/qres ME search
    let ptr_align_log2 = (plane_org.as_ptr() as usize).trailing_zeros() as usize;
138 139
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
140 141 142 143
    let mut sum = 0 as u32;
    let org_stride = (plane_org.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
    assert!(blk_h >= 4 && blk_w >= 4);
144
    let step_size = blk_h.min(blk_w).min(ptr_align);
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
        // FIXME for now, T == u8
        let org_ptr = org_ptr as *const u8;
        let ref_ptr = ref_ptr as *const u8;
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

Luca Barbato's avatar
Luca Barbato committed
169
  #[inline(always)]
170 171
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
172 173 174 175
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
176
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
177 178 179 180
        return unsafe {
          sad_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
        };
      }
181 182 183 184 185
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
          sad_sse2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
186
    }
Luca Barbato's avatar
Luca Barbato committed
187
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
188
  }
189 190 191
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
192
  use crate::plane::*;
193
  use crate::util::*;
194

Luca Barbato's avatar
Luca Barbato committed
195
  #[inline(always)]
196 197
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
198 199 200
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
201

Luca Barbato's avatar
Luca Barbato committed
202 203
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
204

Luca Barbato's avatar
Luca Barbato committed
205
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
206 207 208
      sum += slice_org
        .iter()
        .zip(slice_ref)
209
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
210
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
211
    }
212

Luca Barbato's avatar
Luca Barbato committed
213 214
    sum
  }
215 216
}

217
fn get_mv_range(
218
  w_in_b: usize, h_in_b: usize, bo: &BlockOffset, blk_w: usize, blk_h: usize
219
) -> (isize, isize, isize, isize) {
220 221 222
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
223
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
224
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
225
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
226 227 228 229

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

230 231
pub fn get_subset_predictors<T: Pixel>(
  fi: &FrameInvariants<T>, bo: &BlockOffset, cmv: MotionVector,
232
  frame_mvs: &[MotionVector], frame_ref_opt: &Option<Arc<ReferenceFrame<T>>>,
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
  ref_slot: usize
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

  // EPZS subset A and B predictors.

  if bo.x > 0 {
    let left = frame_mvs[bo.y * fi.w_in_b + bo.x - 1];
    predictors.push(left);
  }
  if bo.y > 0 {
    let top = frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x];
    predictors.push(top);

    if bo.x < fi.w_in_b - 1 {
      let top_right = frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x + 1];
      predictors.push(top_right);
    }
  }

  if predictors.len() > 0 {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
254
    let mut median_mv = MotionVector::default();
255 256 257 258 259 260 261 262
    for mv in predictors.iter() {
      median_mv = median_mv + *mv;
    }
    median_mv = median_mv / (predictors.len() as i16);

    predictors.push(median_mv.quantize_to_fullpel());
  }

Vladimir Kazakov's avatar
Vladimir Kazakov committed
263
  predictors.push(MotionVector::default());
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296

  // Coarse motion estimation.

  predictors.push(cmv.quantize_to_fullpel());

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_slot];

    if bo.x > 0 {
      let left = prev_frame_mvs[bo.y * fi.w_in_b + bo.x - 1];
      predictors.push(left);
    }
    if bo.y > 0 {
      let top = prev_frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x];
      predictors.push(top);
    }
    if bo.x < fi.w_in_b - 1 {
      let right = prev_frame_mvs[bo.y * fi.w_in_b + bo.x + 1];
      predictors.push(right);
    }
    if bo.y < fi.h_in_b - 1 {
      let bottom = prev_frame_mvs[(bo.y + 1) * fi.w_in_b + bo.x];
      predictors.push(bottom);
    }

    predictors.push(prev_frame_mvs[bo.y * fi.w_in_b + bo.x]);
  }

  predictors
}

297 298
pub fn motion_estimation<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, bo: &BlockOffset,
299
  ref_frame: usize, cmv: MotionVector, pmv: [MotionVector; 2],
300
  ref_slot: usize
301
) -> MotionVector {
302
  match fi.rec_buffer.frames[fi.ref_frames[ref_frame - LAST_FRAME] as usize] {
303
    Some(ref rec) => {
304

305 306 307 308
      let po = PlaneOffset {
        x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT,
        y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT
      };
309 310
      let blk_w = bsize.width();
      let blk_h = bsize.height();
311 312 313 314 315 316
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

      // 0.5 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

      // Full-pixel motion estimation
317

318
      let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
319
      let mut best_mv = MotionVector::default();
320

321 322 323 324 325 326 327 328 329 330 331 332 333 334
      let frame_mvs = &fs.frame_mvs[ref_slot];
      let frame_ref = &fi.rec_buffer.frames[fi.ref_frames[0] as usize];

      if fi.config.speed_settings.diamond_me {
        let predictors = get_subset_predictors(fi, bo, cmv,
          frame_mvs, frame_ref, ref_slot);

        diamond_me_search(
          fi, &po,
          &fs.input.planes[0], &rec.frame.planes[0],
          &predictors, fi.sequence.bit_depth,
          pmv, lambda,
          mvx_min, mvx_max, mvy_min, mvy_max,
          blk_w, blk_h,
335 336
          &mut best_mv, &mut lowest_cost, &mut None, ref_frame
        );
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
      } else {
        let range = 16;
        let x_lo = po.x + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
        let x_hi = po.x + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
        let y_lo = po.y + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
        let y_hi = po.y + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h,
          blk_w,
          &fs.input.planes[0],
          &rec.frame.planes[0],
          &mut best_mv,
          &mut lowest_cost,
          &po,
          2,
          fi.sequence.bit_depth,
          lambda,
          pmv,
          fi.allow_high_precision_mv
        );
      }
Frank Bossen's avatar
Frank Bossen committed
363

364
      // Sub-pixel motion estimation
fbossen's avatar
fbossen committed
365
      let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);
Frank Bossen's avatar
Frank Bossen committed
366

367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
      if fi.config.speed_settings.diamond_me {
        let predictors = vec![best_mv];
        diamond_me_search(
          fi, &po,
          &fs.input.planes[0], &rec.frame.planes[0],
          &predictors, fi.sequence.bit_depth,
          pmv, lambda,
          mvx_min, mvx_max, mvy_min, mvy_max,
          blk_w, blk_h,
          &mut best_mv, &mut lowest_cost, &mut Some(tmp_plane), ref_frame
        );
      } else {
        telescopic_subpel_search(
          fi, fs, bsize, &po,
          lambda, ref_frame, pmv,
          mvx_min, mvx_max, mvy_min, mvy_max,
          &mut tmp_plane, &mut best_mv, &mut lowest_cost
        );
Frank Bossen's avatar
Frank Bossen committed
385 386
      }

387
      best_mv
388
    }
389

Vladimir Kazakov's avatar
Vladimir Kazakov committed
390
    None => MotionVector::default()
391 392
  }
}
393

394 395 396
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
397
  predictors: &[MotionVector],
398
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
399 400
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
401 402
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: usize) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
403
  *center_mv = MotionVector::default();
404 405 406 407 408 409
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
410
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
411 412 413 414 415 416 417 418

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

419 420 421
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
422
  predictors: &[MotionVector],
423
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
424 425
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
426 427
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: usize)
428 429
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
430 431 432 433 434 435 436 437 438
  let (mut diamond_radius, diamond_radius_end) = {
    if tmp_plane_opt.is_some() {
      // Sub-pixel motion estimation
      (4i16, if fi.allow_high_precision_mv {1i16} else {2i16})
    } else {
      // Full pixel motion estimation
      (16i16, 8i16)
    }
  };
439 440 441 442

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
443 444
    blk_w, blk_h, center_mv, center_mv_cost,
    tmp_plane_opt, ref_frame);
445 446 447

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
448
    let mut best_diamond_mv = MotionVector::default();
449 450 451 452 453 454 455 456 457 458 459

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
          fi, &po, p_org, p_ref, bit_depth,
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
460
          blk_w, blk_h, cand_mv, tmp_plane_opt, ref_frame);
461 462 463 464 465 466 467 468

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
469
      if diamond_radius == diamond_radius_end {
470 471 472 473 474 475 476 477 478 479 480 481 482 483
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

484 485 486
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
487
  pmv: [MotionVector; 2], lambda: u32,
488 489
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
490 491
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
  ref_frame: usize) -> u64
492 493 494 495 496 497 498 499 500 501
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
    let mut tmp_slice = &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
      &po,
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
    let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
    let plane_ref = p_ref.slice(&PlaneOffset {
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
539 540 541 542 543 544 545 546 547
  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
fn telescopic_subpel_search<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, po: &PlaneOffset,
  lambda: u32, ref_frame: usize, pmv: [MotionVector; 2],
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  tmp_plane: &mut Plane<T>, best_mv: &mut MotionVector, lowest_cost: &mut u64
) {
  let blk_w = bsize.width();
  let blk_h = bsize.height();

  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
            &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });

          mode.predict_inter(
            fi,
            0,
            &po,
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

        let plane_org = fs.input.planes[0].slice(&po);
        let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });

        let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

620
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
621
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
622
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
623
  lowest_cost: &mut u64, po: &PlaneOffset, step: usize, bit_depth: usize,
624
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
625
) {
Luca Barbato's avatar
Luca Barbato committed
626 627 628 629 630
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
631 632 633
      let plane_org = p_org.slice(po);
      let plane_ref = p_ref.slice(&PlaneOffset { x, y });

Kyle Siefring's avatar
Kyle Siefring committed
634
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
635

Frank Bossen's avatar
Frank Bossen committed
636 637 638 639 640 641 642 643
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
644
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
645

Luca Barbato's avatar
Luca Barbato committed
646 647 648 649 650
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
651 652 653
}

// Adjust block offset such that entire block lies within frame boundaries
654
fn adjust_bo<T: Pixel>(bo: &BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
655 656 657 658 659 660
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

Frank Bossen's avatar
Frank Bossen committed
661 662 663 664 665 666 667 668 669 670 671 672 673
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

674 675
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
676
  bo: &BlockOffset
677 678 679 680 681 682 683 684 685
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
686

687 688
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
689
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
690 691 692 693
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
694

695
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
696
    let mut best_mv = MotionVector::default();
697

Frank Bossen's avatar
Frank Bossen committed
698
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
699
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
700

701
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
702 703 704 705 706 707 708 709 710
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
711
      &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
712 713
      &po,
      1,
714
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
715
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
716
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
717
      fi.allow_high_precision_mv
718 719 720 721 722 723 724 725
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

726 727
pub fn estimate_motion_ss2<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
728
  bo: &BlockOffset, pmvs: &[Option<MotionVector>; 3]
729 730 731 732 733 734 735 736 737 738
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1
    };
    let range = 16;
739
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
740

741
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
742
    let mut best_mv = MotionVector::default();
743

Frank Bossen's avatar
Frank Bossen committed
744
    // Divide by 4 to account for subsampling, 0.125 is a fudge factor
745
    let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
746

747 748
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
749 750 751 752
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
753 754

        full_search(
Kyle Siefring's avatar
Kyle Siefring committed
755 756 757 758 759 760 761 762 763
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
764
          &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
765 766
          &po,
          1,
767
          fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
768
          lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
769
          [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
770
          fi.allow_high_precision_mv
771 772 773 774 775 776 777 778 779 780
        );
      }
    }

    Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
  } else {
    None
  }
}

781 782 783
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
784 785
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
786 787

  // Generate plane data for get_sad_same()
788
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {
789 790
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
791 792
    // Make the test pattern robust to data alignment
    let xpad_off = (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32;
Luca Barbato's avatar
Luca Barbato committed
793

794
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
795
      for (j, pixel) in row.into_iter().enumerate() {
796
        let val = (j + i) as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
797
        assert!(val >= u8::min_value().into() &&
798
            val <= u8::max_value().into());
799
        *pixel = T::cast_from(val);
800 801 802 803
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
804
      for (j, pixel) in row.into_iter().enumerate() {
805
        let val = j as i32 - i as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
806
        assert!(val >= u8::min_value().into() &&
807
            val <= u8::max_value().into());
808
        *pixel = T::cast_from(val);
809 810 811 812 813 814 815
      }
    }

    (input_plane, rec_plane)
  }

  // Regression and validation test for SAD computation
816
  fn get_sad_same_inner<T: Pixel>() {
817
    let blocks: Vec<(BlockSize, u32)> = vec![
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
      (BLOCK_4X4, 1912),
      (BLOCK_4X8, 3496),
      (BLOCK_8X4, 4296),
      (BLOCK_8X8, 7824),
      (BLOCK_8X16, 14416),
      (BLOCK_16X8, 16592),
      (BLOCK_16X16, 31136),
      (BLOCK_16X32, 59552),
      (BLOCK_32X16, 60064),
      (BLOCK_32X32, 120128),
      (BLOCK_32X64, 250176),
      (BLOCK_64X32, 186688),
      (BLOCK_64X64, 438912),
      (BLOCK_64X128, 1016768),
      (BLOCK_128X64, 654272),
      (BLOCK_128X128, 1689792),
      (BLOCK_4X16, 6664),
      (BLOCK_16X4, 8680),
      (BLOCK_8X32, 27600),
      (BLOCK_32X8, 31056),
      (BLOCK_16X64, 116384),
      (BLOCK_64X16, 93344),
840 841
    ];

Kyle Siefring's avatar
Kyle Siefring committed
842
    let bit_depth: usize = 8;
843
    let (input_plane, rec_plane) = setup_sad::<T>();
844 845

    for block in blocks {
Kyle Siefring's avatar
Kyle Siefring committed
846 847
      let bsw = block.0.width();
      let bsh = block.0.height();
848
      let po = PlaneOffset { x: 32, y: 40 };
849

Kyle Siefring's avatar
Kyle Siefring committed
850 851
      let mut input_slice = input_plane.slice(&po);
      let mut rec_slice = rec_plane.slice(&po);
852

Kyle Siefring's avatar
Kyle Siefring committed
853 854 855 856
      assert_eq!(
        block.1,
        get_sad(&mut input_slice, &mut rec_slice, bsw, bsh, bit_depth)
      );
857 858
    }
  }
859 860 861 862 863 864 865 866 867 868

  #[test]
  fn get_sad_same_u8() {
    get_sad_same_inner::<u8>();
  }

  #[test]
  fn get_sad_same_u16() {
    get_sad_same_inner::<u16>();
  }
869
}