me.rs 23.9 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Raphaël Zumer's avatar
Raphaël Zumer committed
10 11 12 13
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
pub use self::nasm::get_sad;
#[cfg(any(not(target_arch = "x86_64"), windows, not(feature = "nasm")))]
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
15
use crate::encoder::ReferenceFrame;
Raphaël Zumer's avatar
Raphaël Zumer committed
16 17 18 19
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
use crate::plane::*;
20
use crate::util::Pixel;
21 22

use std::sync::Arc;
23

24 25
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
26 27
  use crate::plane::*;
  use crate::util::*;
28
  use std::mem;
Luca Barbato's avatar
Luca Barbato committed
29

Raphaël Zumer's avatar
Raphaël Zumer committed
30 31
  use libc;

Luca Barbato's avatar
Luca Barbato committed
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
  extern {
    fn rav1e_sad_4x4_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_8x8_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_16x16_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_32x32_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_64x64_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_128x128_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
  }
Kyle Siefring's avatar
Kyle Siefring committed
63

Luca Barbato's avatar
Luca Barbato committed
64
  #[target_feature(enable = "ssse3")]
65 66
  unsafe fn sad_ssse3<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
67 68
    blk_w: usize, bit_depth: usize
  ) -> u32 {
69
    assert!(mem::size_of::<T>() == 2, "only implemented for u16 for now");
Luca Barbato's avatar
Luca Barbato committed
70
    let mut sum = 0 as u32;
71 72
    let org_stride = (plane_org.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
    let ref_stride = (plane_ref.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
Luca Barbato's avatar
Luca Barbato committed
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
Romain Vimont's avatar
Romain Vimont committed
89 90
        let org_ptr = org_slice.as_ptr();
        let ref_ptr = ref_slice.as_ptr();
91 92 93
        // FIXME for now, T == u16
        let org_ptr = org_ptr as *const u16;
        let ref_ptr = ref_ptr as *const u16;
Luca Barbato's avatar
Luca Barbato committed
94 95
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
96
    }
97
    sum
Kyle Siefring's avatar
Kyle Siefring committed
98 99
  }

Luca Barbato's avatar
Luca Barbato committed
100
  #[inline(always)]
101 102
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
103 104 105 106 107 108 109 110 111
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
          sad_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
112
    }
Luca Barbato's avatar
Luca Barbato committed
113
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
114
  }
115 116 117
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
118
  use crate::plane::*;
119
  use crate::util::*;
120

Luca Barbato's avatar
Luca Barbato committed
121
  #[inline(always)]
122 123
  pub fn get_sad<T: Pixel>(
    plane_org: &PlaneSlice<'_, T>, plane_ref: &PlaneSlice<'_, T>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
124 125 126
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
127

Luca Barbato's avatar
Luca Barbato committed
128 129
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
130

Luca Barbato's avatar
Luca Barbato committed
131
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
132 133 134
      sum += slice_org
        .iter()
        .zip(slice_ref)
135
        .map(|(&a, &b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u32)
136
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
137
    }
138

Luca Barbato's avatar
Luca Barbato committed
139 140
    sum
  }
141 142
}

143
fn get_mv_range(
144
  w_in_b: usize, h_in_b: usize, bo: &BlockOffset, blk_w: usize, blk_h: usize
145
) -> (isize, isize, isize, isize) {
146 147 148
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
149
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
150
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
151
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
152 153 154 155

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

156 157
pub fn get_subset_predictors<T: Pixel>(
  fi: &FrameInvariants<T>, bo: &BlockOffset, cmv: MotionVector,
158
  frame_mvs: &[MotionVector], frame_ref_opt: &Option<Arc<ReferenceFrame<T>>>,
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
  ref_slot: usize
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

  // EPZS subset A and B predictors.

  if bo.x > 0 {
    let left = frame_mvs[bo.y * fi.w_in_b + bo.x - 1];
    predictors.push(left);
  }
  if bo.y > 0 {
    let top = frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x];
    predictors.push(top);

    if bo.x < fi.w_in_b - 1 {
      let top_right = frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x + 1];
      predictors.push(top_right);
    }
  }

  if predictors.len() > 0 {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
180
    let mut median_mv = MotionVector::default();
181 182 183 184 185 186 187 188
    for mv in predictors.iter() {
      median_mv = median_mv + *mv;
    }
    median_mv = median_mv / (predictors.len() as i16);

    predictors.push(median_mv.quantize_to_fullpel());
  }

Vladimir Kazakov's avatar
Vladimir Kazakov committed
189
  predictors.push(MotionVector::default());
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

  // Coarse motion estimation.

  predictors.push(cmv.quantize_to_fullpel());

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_slot];

    if bo.x > 0 {
      let left = prev_frame_mvs[bo.y * fi.w_in_b + bo.x - 1];
      predictors.push(left);
    }
    if bo.y > 0 {
      let top = prev_frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x];
      predictors.push(top);
    }
    if bo.x < fi.w_in_b - 1 {
      let right = prev_frame_mvs[bo.y * fi.w_in_b + bo.x + 1];
      predictors.push(right);
    }
    if bo.y < fi.h_in_b - 1 {
      let bottom = prev_frame_mvs[(bo.y + 1) * fi.w_in_b + bo.x];
      predictors.push(bottom);
    }

    predictors.push(prev_frame_mvs[bo.y * fi.w_in_b + bo.x]);
  }

  predictors
}

223 224
pub fn motion_estimation<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, bo: &BlockOffset,
225
  ref_frame: usize, cmv: MotionVector, pmv: [MotionVector; 2],
226
  ref_slot: usize
227
) -> MotionVector {
228
  match fi.rec_buffer.frames[fi.ref_frames[ref_frame - LAST_FRAME] as usize] {
229
    Some(ref rec) => {
230

231 232 233 234
      let po = PlaneOffset {
        x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT,
        y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT
      };
235 236
      let blk_w = bsize.width();
      let blk_h = bsize.height();
237 238 239 240 241 242
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

      // 0.5 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

      // Full-pixel motion estimation
243

244
      let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
245
      let mut best_mv = MotionVector::default();
246

247 248 249 250 251 252 253 254 255 256 257 258 259 260
      let frame_mvs = &fs.frame_mvs[ref_slot];
      let frame_ref = &fi.rec_buffer.frames[fi.ref_frames[0] as usize];

      if fi.config.speed_settings.diamond_me {
        let predictors = get_subset_predictors(fi, bo, cmv,
          frame_mvs, frame_ref, ref_slot);

        diamond_me_search(
          fi, &po,
          &fs.input.planes[0], &rec.frame.planes[0],
          &predictors, fi.sequence.bit_depth,
          pmv, lambda,
          mvx_min, mvx_max, mvy_min, mvy_max,
          blk_w, blk_h,
261 262
          &mut best_mv, &mut lowest_cost, &mut None, ref_frame
        );
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
      } else {
        let range = 16;
        let x_lo = po.x + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
        let x_hi = po.x + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
        let y_lo = po.y + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
        let y_hi = po.y + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h,
          blk_w,
          &fs.input.planes[0],
          &rec.frame.planes[0],
          &mut best_mv,
          &mut lowest_cost,
          &po,
          2,
          fi.sequence.bit_depth,
          lambda,
          pmv,
          fi.allow_high_precision_mv
        );
      }
Frank Bossen's avatar
Frank Bossen committed
289

290
      // Sub-pixel motion estimation
fbossen's avatar
fbossen committed
291
      let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);
Frank Bossen's avatar
Frank Bossen committed
292

293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
      if fi.config.speed_settings.diamond_me {
        let predictors = vec![best_mv];
        diamond_me_search(
          fi, &po,
          &fs.input.planes[0], &rec.frame.planes[0],
          &predictors, fi.sequence.bit_depth,
          pmv, lambda,
          mvx_min, mvx_max, mvy_min, mvy_max,
          blk_w, blk_h,
          &mut best_mv, &mut lowest_cost, &mut Some(tmp_plane), ref_frame
        );
      } else {
        telescopic_subpel_search(
          fi, fs, bsize, &po,
          lambda, ref_frame, pmv,
          mvx_min, mvx_max, mvy_min, mvy_max,
          &mut tmp_plane, &mut best_mv, &mut lowest_cost
        );
Frank Bossen's avatar
Frank Bossen committed
311 312
      }

313
      best_mv
314
    }
315

Vladimir Kazakov's avatar
Vladimir Kazakov committed
316
    None => MotionVector::default()
317 318
  }
}
319

320 321 322
fn get_best_predictor<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
323
  predictors: &[MotionVector],
324
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
325 326
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
327 328
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: usize) {
Vladimir Kazakov's avatar
Vladimir Kazakov committed
329
  *center_mv = MotionVector::default();
330 331 332 333 334 335
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
336
      blk_w, blk_h, init_mv, tmp_plane_opt, ref_frame);
337 338 339 340 341 342 343 344

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

345 346 347
fn diamond_me_search<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>,
348
  predictors: &[MotionVector],
349
  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
350 351
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
352 353
  center_mv: &mut MotionVector, center_mv_cost: &mut u64,
  tmp_plane_opt: &mut Option<Plane<T>>, ref_frame: usize)
354 355
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
356 357 358 359 360 361 362 363 364
  let (mut diamond_radius, diamond_radius_end) = {
    if tmp_plane_opt.is_some() {
      // Sub-pixel motion estimation
      (4i16, if fi.allow_high_precision_mv {1i16} else {2i16})
    } else {
      // Full pixel motion estimation
      (16i16, 8i16)
    }
  };
365 366 367 368

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
369 370
    blk_w, blk_h, center_mv, center_mv_cost,
    tmp_plane_opt, ref_frame);
371 372 373

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
374
    let mut best_diamond_mv = MotionVector::default();
375 376 377 378 379 380 381 382 383 384 385

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
          fi, &po, p_org, p_ref, bit_depth,
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
386
          blk_w, blk_h, cand_mv, tmp_plane_opt, ref_frame);
387 388 389 390 391 392 393 394

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
395
      if diamond_radius == diamond_radius_end {
396 397 398 399 400 401 402 403 404 405 406 407 408 409
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

410 411 412
fn get_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  po: &PlaneOffset, p_org: &Plane<T>, p_ref: &Plane<T>, bit_depth: usize,
413
  pmv: [MotionVector; 2], lambda: u32,
414 415
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
416 417
  cand_mv: MotionVector, tmp_plane_opt: &mut Option<Plane<T>>,
  ref_frame: usize) -> u64
418 419 420 421 422 423 424 425 426 427
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
  if let Some(ref mut tmp_plane) = tmp_plane_opt {
    let mut tmp_slice = &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });
    PredictionMode::NEWMV.predict_inter(
      fi,
      0,
      &po,
      &mut tmp_slice,
      blk_w,
      blk_h,
      [ref_frame, NONE_FRAME],
      [cand_mv, MotionVector { row: 0, col: 0 }]
    );
    let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  } else {
    // Full pixel motion vector
    let plane_ref = p_ref.slice(&PlaneOffset {
      x: po.x + (cand_mv.col / 8) as isize,
      y: po.y + (cand_mv.row / 8) as isize
    });
    compute_mv_rd_cost(
      fi, pmv, lambda, bit_depth, blk_w, blk_h, cand_mv,
      &plane_org, &plane_ref
    )
  }
}

fn compute_mv_rd_cost<T: Pixel>(
  fi: &FrameInvariants<T>,
  pmv: [MotionVector; 2], lambda: u32,
  bit_depth: usize, blk_w: usize, blk_h: usize, cand_mv: MotionVector,
  plane_org: &PlaneSlice<T>, plane_ref: &PlaneSlice<T>
) -> u64
{
465 466 467 468 469 470 471 472 473
  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
fn telescopic_subpel_search<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, po: &PlaneOffset,
  lambda: u32, ref_frame: usize, pmv: [MotionVector; 2],
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  tmp_plane: &mut Plane<T>, best_mv: &mut MotionVector, lowest_cost: &mut u64
) {
  let blk_w = bsize.width();
  let blk_h = bsize.height();

  let mode = PredictionMode::NEWMV;

  let mut steps = vec![8, 4, 2];
  if fi.allow_high_precision_mv {
    steps.push(1);
  }

  for step in steps {
    let center_mv_h = *best_mv;
    for i in 0..3 {
      for j in 0..3 {
        // Skip the center point that was already tested
        if i == 1 && j == 1 {
          continue;
        }

        let cand_mv = MotionVector {
          row: center_mv_h.row + step * (i as i16 - 1),
          col: center_mv_h.col + step * (j as i16 - 1)
        };

        if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
          continue;
        }
        if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
          continue;
        }

        {
          let tmp_slice =
            &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });

          mode.predict_inter(
            fi,
            0,
            &po,
            tmp_slice,
            blk_w,
            blk_h,
            [ref_frame, NONE_FRAME],
            [cand_mv, MotionVector { row: 0, col: 0 }]
          );
        }

        let plane_org = fs.input.planes[0].slice(&po);
        let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });

        let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);

        let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
        let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
        let rate = rate1.min(rate2 + 1);
        let cost = 256 * sad as u64 + rate as u64 * lambda as u64;

        if cost < *lowest_cost {
          *lowest_cost = cost;
          *best_mv = cand_mv;
        }
      }
    }
  }
}

546
fn full_search<T: Pixel>(
Kyle Siefring's avatar
Kyle Siefring committed
547
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
548
  blk_w: usize, p_org: &Plane<T>, p_ref: &Plane<T>, best_mv: &mut MotionVector,
549
  lowest_cost: &mut u64, po: &PlaneOffset, step: usize, bit_depth: usize,
550
  lambda: u32, pmv: [MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
551
) {
Luca Barbato's avatar
Luca Barbato committed
552 553 554 555 556
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
557 558 559
      let plane_org = p_org.slice(po);
      let plane_ref = p_ref.slice(&PlaneOffset { x, y });

Kyle Siefring's avatar
Kyle Siefring committed
560
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
561

Frank Bossen's avatar
Frank Bossen committed
562 563 564 565 566 567 568 569
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
570
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
571

Luca Barbato's avatar
Luca Barbato committed
572 573 574 575 576
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
577 578 579
}

// Adjust block offset such that entire block lies within frame boundaries
580
fn adjust_bo<T: Pixel>(bo: &BlockOffset, fi: &FrameInvariants<T>, blk_w: usize, blk_h: usize) -> BlockOffset {
581 582 583 584 585 586
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

Frank Bossen's avatar
Frank Bossen committed
587 588 589 590 591 592 593 594 595 596 597 598 599
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

600 601
pub fn estimate_motion_ss4<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
602
  bo: &BlockOffset
603 604 605 606 607 608 609 610 611
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
612

613 614
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
615
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
616 617 618 619
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
620

621
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
622
    let mut best_mv = MotionVector::default();
623

Frank Bossen's avatar
Frank Bossen committed
624
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
625
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
626

627
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
628 629 630 631 632 633 634 635 636
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
637
      &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
638 639
      &po,
      1,
640
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
641
      lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
642
      [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
643
      fi.allow_high_precision_mv
644 645 646 647 648 649 650 651
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

652 653
pub fn estimate_motion_ss2<T: Pixel>(
  fi: &FrameInvariants<T>, fs: &FrameState<T>, bsize: BlockSize, ref_idx: usize,
654
  bo: &BlockOffset, pmvs: &[Option<MotionVector>; 3]
655 656 657 658 659 660 661 662 663 664
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1
    };
    let range = 16;
665
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
666

667
    let mut lowest_cost = std::u64::MAX;
Vladimir Kazakov's avatar
Vladimir Kazakov committed
668
    let mut best_mv = MotionVector::default();
669

Frank Bossen's avatar
Frank Bossen committed
670
    // Divide by 4 to account for subsampling, 0.125 is a fudge factor
671
    let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
672

673 674
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
675 676 677 678
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
679 680

        full_search(
Kyle Siefring's avatar
Kyle Siefring committed
681 682 683 684 685 686 687 688 689
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
690
          &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
691 692
          &po,
          1,
693
          fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
694
          lambda,
Vladimir Kazakov's avatar
Vladimir Kazakov committed
695
          [MotionVector::default(); 2],
Frank Bossen's avatar
Frank Bossen committed
696
          fi.allow_high_precision_mv
697 698 699 700 701 702 703 704 705 706
        );
      }
    }

    Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
  } else {
    None
  }
}

707 708 709
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
710 711
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
712 713

  // Generate plane data for get_sad_same()
714
  fn setup_sad() -> (Plane<u16>, Plane<u16>) {
715 716
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
Luca Barbato's avatar
Luca Barbato committed
717

718
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
719
      for (j, pixel) in row.into_iter().enumerate() {
720
        let val = ((j + i) as i32 & 255i32) as u16;
Luca Barbato's avatar
Luca Barbato committed
721
        assert!(val >= u8::min_value().into() &&
722 723
            val <= u8::max_value().into());
        *pixel = val;
724 725 726 727
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
728
      for (j, pixel) in row.into_iter().enumerate() {
729
        let val = (j as i32 - i as i32 & 255i32) as u16;
Luca Barbato's avatar
Luca Barbato committed
730
        assert!(val >= u8::min_value().into() &&
731 732
            val <= u8::max_value().into());
        *pixel = val;
733 734 735 736 737 738 739 740 741 742
      }
    }

    (input_plane, rec_plane)
  }

  // Regression and validation test for SAD computation
  #[test]
  fn get_sad_same() {
    let blocks: Vec<(BlockSize, u32)> = vec![
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
      (BLOCK_4X4, 1912),
      (BLOCK_4X8, 3496),
      (BLOCK_8X4, 4296),
      (BLOCK_8X8, 7824),
      (BLOCK_8X16, 14416),
      (BLOCK_16X8, 16592),
      (BLOCK_16X16, 31136),
      (BLOCK_16X32, 59552),
      (BLOCK_32X16, 60064),
      (BLOCK_32X32, 120128),
      (BLOCK_32X64, 250176),
      (BLOCK_64X32, 186688),
      (BLOCK_64X64, 438912),
      (BLOCK_64X128, 1016768),
      (BLOCK_128X64, 654272),
      (BLOCK_128X128, 1689792),
      (BLOCK_4X16, 6664),
      (BLOCK_16X4, 8680),
      (BLOCK_8X32, 27600),
      (BLOCK_32X8, 31056),
      (BLOCK_16X64, 116384),
      (BLOCK_64X16, 93344),
765 766
    ];

Kyle Siefring's avatar
Kyle Siefring committed
767
    let bit_depth: usize = 8;
768 769 770
    let (input_plane, rec_plane) = setup_sad();

    for block in blocks {
Kyle Siefring's avatar
Kyle Siefring committed
771 772 773
      let bsw = block.0.width();
      let bsh = block.0.height();
      let po = PlaneOffset { x: 40, y: 40 };
774

Kyle Siefring's avatar
Kyle Siefring committed
775 776
      let mut input_slice = input_plane.slice(&po);
      let mut rec_slice = rec_plane.slice(&po);
777

Kyle Siefring's avatar
Kyle Siefring committed
778 779 780 781
      assert_eq!(
        block.1,
        get_sad(&mut input_slice, &mut rec_slice, bsw, bsh, bit_depth)
      );
782 783 784
    }
  }
}