me.rs 30.4 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

10
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Raphaël Zumer's avatar
Raphaël Zumer committed
11
pub use self::nasm::get_sad;
12
#[cfg(any(not(target_arch = "x86_64"), not(feature = "nasm")))]
Raphaël Zumer's avatar
Raphaël Zumer committed
13
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18 19
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
use crate::plane::*;
20
use crate::util::Pixel;
21

Romain Vimont's avatar
Romain Vimont committed
22
use std::ops::{Index, IndexMut};
23
use std::sync::Arc;
24

25
#[cfg(all(target_arch = "x86_64", feature = "nasm"))]
26
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
27 28
  use crate::plane::*;
  use crate::util::*;
29
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
30

Raphaël Zumer's avatar
Raphaël Zumer committed
31 32
  use libc;

Luca Barbato's avatar
Luca Barbato committed
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
  extern {
    fn rav1e_sad_4x4_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_8x8_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_16x16_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_32x32_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_64x64_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_128x128_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92

    fn rav1e_sad4x4_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad8x8_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad16x16_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad32x32_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad64x64_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad128x128_sse2(
      src: *const u8, src_stride: libc::ptrdiff_t, dst: *const u8,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
Luca Barbato's avatar
Luca Barbato committed
93
  }
Kyle Siefring's avatar
Kyle Siefring committed
94

Luca Barbato's avatar
Luca Barbato committed
95
  #[target_feature(enable = "ssse3")]
96
  unsafe fn sad_hbd_ssse3(
97
    plane_org: &PlaneSlice<'_, u16>, plane_ref: &PlaneSlice<'_, u16>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
98 99 100
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
101 102
    let org_stride = (plane_org.plane.cfg.stride * 2) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * 2) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
119 120
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
121 122 123
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
124 125
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
126
    }
127
    sum
Kyle Siefring's avatar
Kyle Siefring committed
128 129
  }

130
  #[target_feature(enable = "sse2")]
131 132
  unsafe fn sad_sse2(
    plane_org: &PlaneSlice<'_, u8>, plane_ref: &PlaneSlice<'_, u8>, blk_h: usize,
133 134 135 136
    blk_w: usize
  ) -> u32 {
    // FIXME unaligned blocks coming from hres/qres ME search
    let ptr_align_log2 = (plane_org.as_ptr() as usize).trailing_zeros() as usize;
137 138
    // The largest unaligned-safe function is for 8x8
    let ptr_align = 1 << ptr_align_log2.max(3);
139
    let mut sum = 0 as u32;
140 141
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t;
142
    assert!(blk_h >= 4 && blk_w >= 4);
143
    let step_size = blk_h.min(blk_w).min(ptr_align);
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
    let func = match step_size.ilog() {
      3 => rav1e_sad4x4_sse2,
      4 => rav1e_sad8x8_sse2,
      5 => rav1e_sad16x16_sse2,
      6 => rav1e_sad32x32_sse2,
      7 => rav1e_sad64x64_sse2,
      8 => rav1e_sad128x128_sse2,
      _ => rav1e_sad128x128_sse2
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
        // FIXME for now, T == u8
        let org_ptr = org_ptr as *const u8;
        let ref_ptr = ref_ptr as *const u8;
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
    }
    sum
  }

Luca Barbato's avatar
Luca Barbato committed
168
  #[inline(always)]
169 170
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
171 172
    blk_w: usize, bit_depth: usize
  ) -> u32 {
173
    #[cfg(all(target_arch = "x86_64", feature = "nasm"))]
Luca Barbato's avatar
Luca Barbato committed
174
    {
175
      if mem::size_of::<T>() == 2 && is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
Luca Barbato's avatar
Luca Barbato committed
176
        return unsafe {
177 178
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u16>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u16>);
179
          sad_hbd_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Luca Barbato's avatar
Luca Barbato committed
180 181
        };
      }
182 183
      if mem::size_of::<T>() == 1 && is_x86_feature_detected!("sse2") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
184 185
          let plane_org = &*(plane_org as *const _ as *const PlaneSlice<'_, u8>);
          let plane_ref = &*(plane_ref as *const _ as *const PlaneSlice<'_, u8>);
186 187 188
          sad_sse2(plane_org, plane_ref, blk_h, blk_w)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
189
    }
Luca Barbato's avatar
Luca Barbato committed
190
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
191
  }
192 193 194
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
195
  use crate::plane::*;
196
  use crate::util::*;
197

Luca Barbato's avatar
Luca Barbato committed
198
  #[inline(always)]
199 200
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
201 202 203
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
204

Luca Barbato's avatar
Luca Barbato committed
205 206
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
207

Luca Barbato's avatar
Luca Barbato committed
208
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
209 210 211
      sum += slice_org
        .iter()
        .zip(slice_ref)
212
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
213
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
214
    }
215

Luca Barbato's avatar
Luca Barbato committed
216 217
    sum
  }
218 219
}

Romain Vimont's avatar
Romain Vimont committed
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
#[derive(Debug, Clone)]
pub struct FrameMotionVectors {
  mvs: Box<[MotionVector]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameMotionVectors {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      mvs: vec![MotionVector::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameMotionVectors {
  type Output = [MotionVector];
  #[inline]
  fn index(&self, index: usize) -> &Self::Output {
    &self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameMotionVectors {
  #[inline]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.mvs[index * self.cols..(index + 1) * self.cols]
  }
}

252
fn get_mv_range(
253
  w_in_b: usize, h_in_b: usize, bo: &BlockOffset, blk_w: usize, blk_h: usize
254
) -> (isize, isize, isize, isize) {
255 256 257
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
258
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
259
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
260
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
261 262 263 264

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

265 266
pub fn get_subset_predictors<T: Pixel>(
  fi: &FrameInvariants<T>, bo: &BlockOffset, cmv: MotionVector,
Romain Vimont's avatar
Romain Vimont committed
267
  frame_mvs: &FrameMotionVectors, frame_ref_opt: &Option<Arc<ReferenceFrame<T>>>,
268 269 270 271 272 273 274
  ref_slot: usize
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

  // EPZS subset A and B predictors.

  if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
275
    let left = frame_mvs[bo.y][bo.x - 1];
276 277 278
    predictors.push(left);
  }
  if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
279
    let top = frame_mvs[bo.y - 1][bo.x];
280 281 282
    predictors.push(top);

    if bo.x < fi.w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
283
      let top_right = frame_mvs[bo.y - 1][bo.x + 1];
284 285 286 287
      predictors.push(top_right);
    }
  }

288
  if !predictors.is_empty() {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
289
    let mut median_mv = MotionVector::default();
290 291 292 293 294 295 296 297
    for mv in predictors.iter() {
      median_mv = median_mv + *mv;
    }
    median_mv = median_mv / (predictors.len() as i16);

    predictors.push(median_mv.quantize_to_fullpel());
  }

Vladimir Kazakov's avatar
Vladimir Kazakov committed
298
  predictors.push(MotionVector::default());
299 300 301 302 303 304 305 306 307 308 309

  // Coarse motion estimation.

  predictors.push(cmv.quantize_to_fullpel());

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_slot];

    if bo.x > 0 {
Romain Vimont's avatar
Romain Vimont committed
310
      let left = prev_frame_mvs[bo.y][bo.x - 1];
311 312 313
      predictors.push(left);
    }
    if bo.y > 0 {
Romain Vimont's avatar
Romain Vimont committed
314
      let top = prev_frame_mvs[bo.y - 1][bo.x];
315 316 317
      predictors.push(top);
    }
    if bo.x < fi.w_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
318
      let right = prev_frame_mvs[bo.y][bo.x + 1];
319 320 321
      predictors.push(right);
    }
    if bo.y < fi.h_in_b - 1 {
Romain Vimont's avatar
Romain Vimont committed
322
      let bottom = prev_frame_mvs[bo.y + 1][bo.x];
323 324 325
      predictors.push(bottom);
    }

Romain Vimont's avatar
Romain Vimont committed
326
    predictors.push(prev_frame_mvs[bo.y][bo.x]);
327 328 329 330 331
  }

  predictors
}

332
pub trait MotionEstimation {
333 334 335 336 337 338 339
  fn full_pixel_me<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &Arc<ReferenceFrame<T>>, po: &PlaneOffset,
    bo: &BlockOffset, lambda: u32,
    ref_slot: usize, cmv: MotionVector, pmv: [MotionVector; 2],
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
    lowest_cost: &mut u64, ref_frame: usize
340
  );
341

342 343 344 345 346 347 348 349 350
  fn sub_pixel_me<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &Arc<ReferenceFrame<T>>, po: &PlaneOffset,
    bo: &BlockOffset, lambda: u32, pmv: [MotionVector; 2],
    mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
    blk_w: usize, blk_h: usize, best_mv: &mut MotionVector,
    lowest_cost: &mut u64, ref_frame: usize,
    tmp_plane: Plane<T>, bsize: BlockSize
  );

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
  fn motion_estimation<T: Pixel> (
    fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize,
    bo: &BlockOffset, ref_frame: usize, cmv: MotionVector,
    pmv: [MotionVector; 2], ref_slot: usize
  ) -> MotionVector {
    match fi.rec_buffer.frames[fi.ref_frames[ref_frame - LAST_FRAME] as usize]
    {
      Some(ref rec) => {
        let po = PlaneOffset {
          x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT,
          y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT
        };
        let blk_w = bsize.width();
        let blk_h = bsize.height();
        let (mvx_min, mvx_max, mvy_min, mvy_max) =
          get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

        // 0.5 is a fudge factor
        let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

        // Full-pixel motion estimation

        let mut lowest_cost = std::u64::MAX;
        let mut best_mv = MotionVector::default();

376 377 378
        Self::full_pixel_me(fi, fs, rec, &po, bo, lambda, ref_slot, cmv, pmv,
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame);
379

380 381 382 383 384
        let tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);
        Self::sub_pixel_me(fi, fs, rec, &po, bo, lambda, pmv,
                           mvx_min, mvx_max, mvy_min, mvy_max, blk_w, blk_h,
                           &mut best_mv, &mut lowest_cost, ref_frame,
                           tmp_plane, bsize);
385 386

        best_mv
Frank Bossen's avatar
Frank Bossen committed
387 388
      }

389
      None => MotionVector::default()
390
    }
391 392
  }
}
393

394 395 396
pub struct DiamondSearch {}
pub struct FullSearch {}

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
impl MotionEstimation for DiamondSearch {
  fn full_pixel_me<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &Arc<ReferenceFrame<T>>,
    po: &PlaneOffset, bo: &BlockOffset, lambda: u32, ref_slot: usize,
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: usize
  ) {
    let frame_mvs = &fs.frame_mvs[ref_slot];
    let frame_ref = &fi.rec_buffer.frames[fi.ref_frames[0] as usize];
    let predictors =
      get_subset_predictors(fi, bo, cmv, frame_mvs, frame_ref, ref_slot);

    diamond_me_search(
      fi,
      &po,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
      &mut None,
      ref_frame
    );
  }
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462

  fn sub_pixel_me<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &Arc<ReferenceFrame<T>>,
    po: &PlaneOffset, _bo: &BlockOffset, lambda: u32,
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: usize,
    tmp_plane: Plane<T>, _bsize: BlockSize
  )
  {
    let predictors = vec![*best_mv];
    diamond_me_search(
      fi,
      &po,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      &predictors,
      fi.sequence.bit_depth,
      pmv,
      lambda,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      blk_w,
      blk_h,
      best_mv,
      lowest_cost,
      &mut Some(tmp_plane),
      ref_frame
    );
  }
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
}

impl MotionEstimation for FullSearch {
  fn full_pixel_me<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, rec: &Arc<ReferenceFrame<T>>,
    po: &PlaneOffset, _bo: &BlockOffset, lambda: u32, _ref_slot: usize,
    cmv: MotionVector, pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, blk_w: usize, blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64, _ref_frame: usize
  ) {
    let range = 16;
    let x_lo = po.x
      + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let x_hi = po.x
      + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
    let y_lo = po.y
      + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
    let y_hi = po.y
      + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

    full_search(
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h,
      blk_w,
      &fs.input.planes[0],
      &rec.frame.planes[0],
      best_mv,
      lowest_cost,
      &po,
      2,
      fi.sequence.bit_depth,
      lambda,
      pmv,
      fi.allow_high_precision_mv
    );
  }
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528

  fn sub_pixel_me<T: Pixel>(
    fi: &FrameInvariants<T>, fs: &FrameState<T>, _rec: &Arc<ReferenceFrame<T>>,
    po: &PlaneOffset, _bo: &BlockOffset, lambda: u32,
    pmv: [MotionVector; 2], mvx_min: isize, mvx_max: isize,
    mvy_min: isize, mvy_max: isize, _blk_w: usize, _blk_h: usize,
    best_mv: &mut MotionVector, lowest_cost: &mut u64, ref_frame: usize,
    mut tmp_plane: Plane<T>, bsize: BlockSize
  )
  {
    telescopic_subpel_search(
      fi,
      fs,
      bsize,
      &po,
      lambda,
      ref_frame,
      pmv,
      mvx_min,
      mvx_max,
      mvy_min,
      mvy_max,
      &mut tmp_plane,
      best_mv,
      lowest_cost
    );
  }
529
}
530

531 532 533
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
534
  predictors: &[MotionVector],
535
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
536 537
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
538 539
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: usize) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
540
  *center_mv = MotionVector::default();
541 542 543 544 545 546
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
547
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
548 549 550 551 552 553 554 555

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

556 557 558
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
559
  predictors: &[MotionVector],
560
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
561 562
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
563 564
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: usize)
565 566
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
567 568 569 570 571 572 573 574 575
  let (mut diamond_radius, diamond_radius_end) = {
    if tmp_plane_opt.is_some() {
      // Sub-pixel motion estimation
      (4i16, if fi.allow_high_precision_mv {1i16} else {2i16})
    } else {
      // Full pixel motion estimation
      (16i16, 8i16)
    }
  };
576 577 578 579

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
580 581
    blk_w, blk_h, center_mv, center_mv_cost,
    tmp_plane_opt, ref_frame);
582 583 584

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
585
    let mut best_diamond_mv = MotionVector::default();
586 587 588 589 590 591 592 593 594 595 596

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
          fi, &po, p_org, p_ref, bit_depth,
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
597
          blk_w, blk_h, cand_mv, tmp_plane_opt, ref_frame);
598 599 600 601 602 603 604 605

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
606
      if diamond_radius == diamond_radius_end {
607 608 609 610 611 612 613 614 615 616 617 618 619 620
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

621 622 623
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
624
  pmv: [MotionVector; 2], lambda: u32,
625 626
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
627 628
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
  ref_frame: usize) -> u64
629 630 631 632 633 634 635 636 637 638
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
    let mut tmp_slice = &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
      &po,
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
    let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
    let plane_ref = p_ref.slice(&PlaneOffset {
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
676 677 678 679 680 681 682 683 684
  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
fn telescopic_subpel_search<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, po: &PlaneOffset,
  lambda: u32, ref_frame: usize, pmv: [MotionVector; 2],
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  tmp_plane: &mut Plane<T>, best_mv: &mut MotionVector, lowest_cost: &mut u64
) {
  let blk_w = bsize.width();
  let blk_h = bsize.height();

  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
            &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });

          mode.predict_inter(
            fi,
            0,
            &po,
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

        let plane_org = fs.input.planes[0].slice(&po);
        let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });

        let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

757
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
758
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
759
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
760
  lowest_cost: &mut u64, po: &PlaneOffset, step: usize, bit_depth: usize,
761
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
762
) {
Luca Barbato's avatar
Luca Barbato committed
763 764 765 766 767
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
768 769 770
      let plane_org = p_org.slice(po);
      let plane_ref = p_ref.slice(&PlaneOffset { x, y });

Kyle Siefring's avatar
Kyle Siefring committed
771
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
772

Frank Bossen's avatar
Frank Bossen committed
773 774 775 776 777 778 779 780
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
781
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
782

Luca Barbato's avatar
Luca Barbato committed
783 784 785 786 787
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
788 789 790
}

// Adjust block offset such that entire block lies within frame boundaries
791
fn adjust_bo<T: Pixel>(bo: &BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
792 793 794 795 796 797
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

798
#[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
799
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
800
  #[inline(always)]
Frank Bossen's avatar
Frank Bossen committed
801 802 803 804 805 806 807 808 809 810 811 812
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

813 814
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
815
  bo: &BlockOffset
816 817 818 819 820 821 822 823 824
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
825

826 827
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
828
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
829 830 831 832
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
833

834
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
835
    let mut best_mv = MotionVector::default();
836

Frank Bossen's avatar
Frank Bossen committed
837
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
838
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
839

840
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
841 842 843 844 845 846 847 848 849
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
850
      &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
851 852
      &po,
      1,
853
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
854
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
855
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
856
      fi.allow_high_precision_mv
857 858 859 860 861 862 863 864
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

865 866
pub fn estimate_motion_ss2<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
867
  bo: &BlockOffset, pmvs: &[Option<MotionVector>; 3]
868 869 870 871 872 873 874 875 876 877
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1
    };
    let range = 16;
878
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
879

880
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
881
    let mut best_mv = MotionVector::default();
882

Frank Bossen's avatar
Frank Bossen committed
883
    // Divide by 4 to account for subsampling, 0.125 is a fudge factor
884
    let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
885

886 887
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
888 889 890 891
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
892 893

        full_search(
Kyle Siefring's avatar
Kyle Siefring committed
894 895 896 897 898 899 900 901 902
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
903
          &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
904 905
          &po,
          1,
906
          fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
907
          lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
908
          [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
909
          fi.allow_high_precision_mv
910 911 912 913 914 915 916 917 918 919
        );
      }
    }

    Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
  } else {
    None
  }
}

920 921 922
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
923 924
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
925 926

  // Generate plane data for get_sad_same()
927
  fn setup_sad<T: Pixel>() -> (Plane<T>, Plane<T>) {
928 929
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
930 931
    // Make the test pattern robust to data alignment
    let xpad_off = (input_plane.cfg.xorigin - input_plane.cfg.xpad) as i32 - 8i32;
Luca Barbato's avatar
Luca Barbato committed
932

933
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
934
      for (j, pixel) in row.into_iter().enumerate() {
935
        let val = (j + i) as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
936
        assert!(val >= u8::min_value().into() &&
937
            val <= u8::max_value().into());
938
        *pixel = T::cast_from(val);
939 940 941 942
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
943
      for (j, pixel) in row.into_iter().enumerate() {
944
        let val = j as i32 - i as i32 - xpad_off & 255i32;
Luca Barbato's avatar
Luca Barbato committed
945
        assert!(val >= u8::min_value().into() &&
946
            val <= u8::max_value().into());
947
        *pixel = T::cast_from(val);
948 949 950 951 952 953 954
      }
    }

    (input_plane, rec_plane)
  }

  // Regression and validation test for SAD computation
955
  fn get_sad_same_inner<T: Pixel>() {
956
    let blocks: Vec<(BlockSize, u32)> = vec![
957
      (BLOCK_4X4, 1912),
958 959
      (BLOCK_4X8, 4296),
      (BLOCK_8X4, 3496),
960
      (BLOCK_8X8, 7824),
961 962
      (BLOCK_8X16, 16592),
      (BLOCK_16X8, 14416),
963
      (BLOCK_16X16, 31136),
964 965
      (BLOCK_16X32, 60064),
      (BLOCK_32X16, 59552),
966
      (BLOCK_32X32, 120128),
967 968
      (BLOCK_32X64, 186688),
      (BLOCK_64X32, 250176),
969
      (BLOCK_64X64, 438912),
970 971
      (BLOCK_64X128, 654272),
      (BLOCK_128X64, 1016768),
972
      (BLOCK_128X128, 1689792),
973 974 975 976 977 978
      (BLOCK_4X16, 8680),
      (BLOCK_16X4, 6664),
      (BLOCK_8X32, 31056),
      (BLOCK_32X8, 27600),
      (BLOCK_16X64, 93344),
      (BLOCK_64X16, 116384),
979 980
    ];

Kyle Siefring's avatar
Kyle Siefring committed
981
    let bit_depth: usize = 8;
982
    let (input_plane, rec_plane) = setup_sad::<T>();
983 984

    for block in blocks {
Kyle Siefring's avatar
Kyle Siefring committed
985 986
      let bsw = block.0.width();
      let bsh = block.0.height();
987
      let po = PlaneOffset { x: 32, y: 40 };
988

Kyle Siefring's avatar
Kyle Siefring committed
989 990
      let mut input_slice = input_plane.slice(&po);
      let mut rec_slice = rec_plane.slice(&po);
991

Kyle Siefring's avatar
Kyle Siefring committed
992 993
      assert_eq!(
        block.1,
994
        get_sad(&mut input_slice, &mut rec_slice, bsh, bsw, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
995
      );
996 997
    }
  }
998 999 1000 1001 1002 1003 1004 1005 1006 1007

  #[test]
  fn get_sad_same_u8() {
    get_sad_same_inner::<u8>();
  }

  #[test]
  fn get_sad_same_u16() {
    get_sad_same_inner::<u16>();
  }
1008
}