me.rs 21.3 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Raphaël Zumer's avatar
Raphaël Zumer committed
10 11 12 13
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
pub use self::nasm::get_sad;
#[cfg(any(not(target_arch = "x86_64"), windows, not(feature = "nasm")))]
pub use self::native::get_sad;
Raphaël Zumer's avatar
Raphaël Zumer committed
14 15 16 17 18
use crate::context::{BlockOffset, BLOCK_TO_PLANE_SHIFT, MI_SIZE};
use crate::FrameInvariants;
use crate::FrameState;
use crate::partition::*;
use crate::plane::*;
19 20 21
use crate::encoder::ReferenceFrame;

use std::sync::Arc;
22

23 24
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
mod nasm {
Raphaël Zumer's avatar
Raphaël Zumer committed
25 26
  use crate::plane::*;
  use crate::util::*;
Luca Barbato's avatar
Luca Barbato committed
27

Raphaël Zumer's avatar
Raphaël Zumer committed
28 29
  use libc;

Luca Barbato's avatar
Luca Barbato committed
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
  extern {
    fn rav1e_sad_4x4_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_8x8_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_16x16_hbd_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_32x32_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_64x64_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;

    fn rav1e_sad_128x128_hbd10_ssse3(
      src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
      dst_stride: libc::ptrdiff_t
    ) -> u32;
  }
Kyle Siefring's avatar
Kyle Siefring committed
61

Luca Barbato's avatar
Luca Barbato committed
62 63
  #[target_feature(enable = "ssse3")]
  unsafe fn sad_ssse3(
64
    plane_org: &PlaneSlice<'_>, plane_ref: &PlaneSlice<'_>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
    // TODO: stride *2??? What is the correct way to do this?
    let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t * 2;
    let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t * 2;
    assert!(blk_h >= 4 && blk_w >= 4);
    let step_size =
      blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
    let func = match step_size.ilog() {
      3 => rav1e_sad_4x4_hbd_ssse3,
      4 => rav1e_sad_8x8_hbd10_ssse3,
      5 => rav1e_sad_16x16_hbd_ssse3,
      6 => rav1e_sad_32x32_hbd10_ssse3,
      7 => rav1e_sad_64x64_hbd10_ssse3,
      8 => rav1e_sad_128x128_hbd10_ssse3,
      _ => rav1e_sad_128x128_hbd10_ssse3
    };
    for r in (0..blk_h).step_by(step_size) {
      for c in (0..blk_w).step_by(step_size) {
        let org_slice = plane_org.subslice(c, r);
        let ref_slice = plane_ref.subslice(c, r);
        let org_ptr = org_slice.as_slice().as_ptr();
        let ref_ptr = ref_slice.as_slice().as_ptr();
        sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
      }
Kyle Siefring's avatar
Kyle Siefring committed
91
    }
92
    sum
Kyle Siefring's avatar
Kyle Siefring committed
93 94
  }

Luca Barbato's avatar
Luca Barbato committed
95 96
  #[inline(always)]
  pub fn get_sad(
97
    plane_org: &PlaneSlice<'_>, plane_ref: &PlaneSlice<'_>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
98 99 100 101 102 103 104 105 106
    blk_w: usize, bit_depth: usize
  ) -> u32 {
    #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
    {
      if is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
        return unsafe {
          sad_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
        };
      }
Kyle Siefring's avatar
Kyle Siefring committed
107
    }
Luca Barbato's avatar
Luca Barbato committed
108
    super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
Kyle Siefring's avatar
Kyle Siefring committed
109
  }
110 111 112
}

mod native {
Raphaël Zumer's avatar
Raphaël Zumer committed
113
  use crate::plane::*;
114

Luca Barbato's avatar
Luca Barbato committed
115 116
  #[inline(always)]
  pub fn get_sad(
117
    plane_org: &PlaneSlice<'_>, plane_ref: &PlaneSlice<'_>, blk_h: usize,
Luca Barbato's avatar
Luca Barbato committed
118 119 120
    blk_w: usize, _bit_depth: usize
  ) -> u32 {
    let mut sum = 0 as u32;
121

Luca Barbato's avatar
Luca Barbato committed
122 123
    let org_iter = plane_org.iter_width(blk_w);
    let ref_iter = plane_ref.iter_width(blk_w);
124

Luca Barbato's avatar
Luca Barbato committed
125
    for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
126 127 128 129 130
      sum += slice_org
        .iter()
        .zip(slice_ref)
        .map(|(&a, &b)| (a as i32 - b as i32).abs() as u32)
        .sum::<u32>();
Luca Barbato's avatar
Luca Barbato committed
131
    }
132

Luca Barbato's avatar
Luca Barbato committed
133 134
    sum
  }
135 136
}

137
fn get_mv_range(
138
  w_in_b: usize, h_in_b: usize, bo: &BlockOffset, blk_w: usize, blk_h: usize
139
) -> (isize, isize, isize, isize) {
140 141 142
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
143
  let mvx_max = (w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
144
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
145
  let mvy_max = (h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
146 147 148 149

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
pub fn get_subset_predictors(
  fi: &FrameInvariants, bo: &BlockOffset, cmv: MotionVector,
  frame_mvs: &Vec<MotionVector>, frame_ref_opt: &Option<Arc<ReferenceFrame>>,
  ref_slot: usize
) -> (Vec<MotionVector>) {
  let mut predictors = Vec::new();

  // EPZS subset A and B predictors.

  if bo.x > 0 {
    let left = frame_mvs[bo.y * fi.w_in_b + bo.x - 1];
    predictors.push(left);
  }
  if bo.y > 0 {
    let top = frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x];
    predictors.push(top);

    if bo.x < fi.w_in_b - 1 {
      let top_right = frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x + 1];
      predictors.push(top_right);
    }
  }

  if predictors.len() > 0 {
    let mut median_mv = MotionVector{row: 0, col: 0};
    for mv in predictors.iter() {
      median_mv = median_mv + *mv;
    }
    median_mv = median_mv / (predictors.len() as i16);

    predictors.push(median_mv.quantize_to_fullpel());
  }

  predictors.push(MotionVector{row: 0, col: 0});

  // Coarse motion estimation.

  predictors.push(cmv.quantize_to_fullpel());

  // EPZS subset C predictors.

  if let Some(ref frame_ref) = frame_ref_opt {
    let prev_frame_mvs = &frame_ref.frame_mvs[ref_slot];

    if bo.x > 0 {
      let left = prev_frame_mvs[bo.y * fi.w_in_b + bo.x - 1];
      predictors.push(left);
    }
    if bo.y > 0 {
      let top = prev_frame_mvs[(bo.y - 1) * fi.w_in_b + bo.x];
      predictors.push(top);
    }
    if bo.x < fi.w_in_b - 1 {
      let right = prev_frame_mvs[bo.y * fi.w_in_b + bo.x + 1];
      predictors.push(right);
    }
    if bo.y < fi.h_in_b - 1 {
      let bottom = prev_frame_mvs[(bo.y + 1) * fi.w_in_b + bo.x];
      predictors.push(bottom);
    }

    predictors.push(prev_frame_mvs[bo.y * fi.w_in_b + bo.x]);
  }

  predictors
}

217
pub fn motion_estimation(
Kyle Siefring's avatar
Kyle Siefring committed
218
  fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, bo: &BlockOffset,
219 220
  ref_frame: usize, cmv: MotionVector, pmv: &[MotionVector; 2],
  ref_slot: usize
221
) -> MotionVector {
222
  match fi.rec_buffer.frames[fi.ref_frames[ref_frame - LAST_FRAME] as usize] {
223
    Some(ref rec) => {
224

225 226 227 228
      let po = PlaneOffset {
        x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT,
        y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT
      };
229 230
      let blk_w = bsize.width();
      let blk_h = bsize.height();
231 232 233 234 235 236
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, bo, blk_w, blk_h);

      // 0.5 is a fudge factor
      let lambda = (fi.me_lambda * 256.0 * 0.5) as u32;

      // Full-pixel motion estimation
237

238
      let mut lowest_cost = std::u64::MAX;
239 240
      let mut best_mv = MotionVector { row: 0, col: 0 };

241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
      let frame_mvs = &fs.frame_mvs[ref_slot];
      let frame_ref = &fi.rec_buffer.frames[fi.ref_frames[0] as usize];

      if fi.config.speed_settings.diamond_me {
        let predictors = get_subset_predictors(fi, bo, cmv,
          frame_mvs, frame_ref, ref_slot);

        diamond_me_search(
          fi, &po,
          &fs.input.planes[0], &rec.frame.planes[0],
          &predictors, fi.sequence.bit_depth,
          pmv, lambda,
          mvx_min, mvx_max, mvy_min, mvy_max,
          blk_w, blk_h,
          &mut best_mv, &mut lowest_cost);
      } else {
        let range = 16;
        let x_lo = po.x + ((-range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
        let x_hi = po.x + ((range + (cmv.col / 8) as isize).max(mvx_min / 8).min(mvx_max / 8));
        let y_lo = po.y + ((-range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));
        let y_hi = po.y + ((range + (cmv.row / 8) as isize).max(mvy_min / 8).min(mvy_max / 8));

        full_search(
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h,
          blk_w,
          &fs.input.planes[0],
          &rec.frame.planes[0],
          &mut best_mv,
          &mut lowest_cost,
          &po,
          2,
          fi.sequence.bit_depth,
          lambda,
          pmv,
          fi.allow_high_precision_mv
        );
      }
Frank Bossen's avatar
Frank Bossen committed
282

283
      // Sub-pixel motion estimation
Frank Bossen's avatar
Frank Bossen committed
284 285

      let mode = PredictionMode::NEWMV;
fbossen's avatar
fbossen committed
286
      let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);
Frank Bossen's avatar
Frank Bossen committed
287

288
      let mut steps = vec![8, 4, 2];
289 290 291 292 293
      if fi.allow_high_precision_mv {
        steps.push(1);
      }

      for step in steps {
Frank Bossen's avatar
Frank Bossen committed
294 295 296 297
        let center_mv_h = best_mv;
        for i in 0..3 {
          for j in 0..3 {
            // Skip the center point that was already tested
298 299 300
            if i == 1 && j == 1 {
              continue;
            }
Frank Bossen's avatar
Frank Bossen committed
301

302 303 304 305
            let cand_mv = MotionVector {
              row: center_mv_h.row + step * (i as i16 - 1),
              col: center_mv_h.col + step * (j as i16 - 1)
            };
Frank Bossen's avatar
Frank Bossen committed
306

307
            if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
308 309
              continue;
            }
310
            if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
311 312 313
              continue;
            }

Frank Bossen's avatar
Frank Bossen committed
314
            {
315 316
              let tmp_slice =
                &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });
Frank Bossen's avatar
Frank Bossen committed
317

318
              mode.predict_inter(
319 320 321 322 323 324 325
                fi,
                0,
                &po,
                tmp_slice,
                blk_w,
                blk_h,
                [ref_frame, NONE_FRAME],
326
                [cand_mv, MotionVector { row: 0, col: 0 }]
327
              );
Frank Bossen's avatar
Frank Bossen committed
328 329
            }

330 331
            let plane_org = fs.input.planes[0].slice(&po);
            let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });
Frank Bossen's avatar
Frank Bossen committed
332

333
            let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, fi.sequence.bit_depth);
Frank Bossen's avatar
Frank Bossen committed
334

Frank Bossen's avatar
Frank Bossen committed
335 336 337
            let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
            let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
            let rate = rate1.min(rate2 + 1);
338
            let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
339 340 341

            if cost < lowest_cost {
              lowest_cost = cost;
Frank Bossen's avatar
Frank Bossen committed
342 343 344 345 346 347
              best_mv = cand_mv;
            }
          }
        }
      }

348
      best_mv
349
    }
350

351
    None => MotionVector { row: 0, col: 0 }
352 353
  }
}
354

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
fn get_best_predictor(fi: &FrameInvariants,
  po: &PlaneOffset, p_org: &Plane, p_ref: &Plane,
  predictors: &[MotionVector],
  bit_depth: usize, pmv: &[MotionVector; 2], lambda: u32,
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
  center_mv: &mut MotionVector, center_mv_cost: &mut u64) {
  *center_mv = MotionVector{row: 0, col: 0};
  *center_mv_cost = std::u64::MAX;

  for &init_mv in predictors.iter() {
    let cost = get_mv_rd_cost(
      fi, po, p_org, p_ref, bit_depth,
      pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
      blk_w, blk_h, init_mv);

    if cost < *center_mv_cost {
      *center_mv = init_mv;
      *center_mv_cost = cost;
    }
  }
}

fn diamond_me_search(
  fi: &FrameInvariants,
  po: &PlaneOffset, p_org: &Plane, p_ref: &Plane,
  predictors: &[MotionVector],
  bit_depth: usize, pmv: &[MotionVector; 2], lambda: u32,
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
  center_mv: &mut MotionVector, center_mv_cost: &mut u64)
{
  let diamond_pattern = [(1i16, 0i16), (0, 1), (-1, 0), (0, -1)];
  let mut diamond_radius: i16 = 16;

  get_best_predictor(
    fi, po, p_org, p_ref, &predictors,
    bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
    blk_w, blk_h, center_mv, center_mv_cost);

  loop {
    let mut best_diamond_rd_cost = std::u64::MAX;
    let mut best_diamond_mv = MotionVector { row: 0, col: 0 };

    for p in diamond_pattern.iter() {

        let cand_mv = MotionVector {
          row: center_mv.row + diamond_radius * p.0,
          col: center_mv.col + diamond_radius * p.1
        };

        let rd_cost = get_mv_rd_cost(
          fi, &po, p_org, p_ref, bit_depth,
          pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
          blk_w, blk_h, cand_mv);

        if rd_cost < best_diamond_rd_cost {
          best_diamond_rd_cost = rd_cost;
          best_diamond_mv = cand_mv;
        }
    }

    if *center_mv_cost <= best_diamond_rd_cost {
      if diamond_radius == 8 {
        break;
      } else {
        diamond_radius /= 2;
      }
    }
    else {
      *center_mv = best_diamond_mv;
      *center_mv_cost = best_diamond_rd_cost;
    }
  }

  assert!(*center_mv_cost < std::u64::MAX);
}

fn get_mv_rd_cost(
  fi: &FrameInvariants,
  po: &PlaneOffset, p_org: &Plane, p_ref: &Plane, bit_depth: usize,
  pmv: &[MotionVector; 2], lambda: u32,
  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
  blk_w: usize, blk_h: usize,
  cand_mv: MotionVector) -> u64
{
  if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
    return std::u64::MAX;
  }
  if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
    return std::u64::MAX;
  }

  let plane_org = p_org.slice(po);
  let plane_ref = p_ref.slice(&PlaneOffset {
    x: po.x + (cand_mv.col / 8) as isize,
    y: po.y + (cand_mv.row / 8) as isize
  });

  let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);

  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
  let rate = rate1.min(rate2 + 1);

  256 * sad as u64 + rate as u64 * lambda as u64
}

Kyle Siefring's avatar
Kyle Siefring committed
463 464 465
fn full_search(
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
  blk_w: usize, p_org: &Plane, p_ref: &Plane, best_mv: &mut MotionVector,
466
  lowest_cost: &mut u64, po: &PlaneOffset, step: usize, bit_depth: usize,
Frank Bossen's avatar
Frank Bossen committed
467
  lambda: u32, pmv: &[MotionVector; 2], allow_high_precision_mv: bool
Kyle Siefring's avatar
Kyle Siefring committed
468
) {
Luca Barbato's avatar
Luca Barbato committed
469 470 471 472 473
    let search_range_y = (y_lo..=y_hi).step_by(step);
    let search_range_x = (x_lo..=x_hi).step_by(step);
    let search_area = search_range_y.flat_map(|y| { search_range_x.clone().map(move |x| (y, x)) });

    let (cost, mv) = search_area.map(|(y, x)| {
474 475 476
      let plane_org = p_org.slice(po);
      let plane_ref = p_ref.slice(&PlaneOffset { x, y });

Kyle Siefring's avatar
Kyle Siefring committed
477
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
478

Frank Bossen's avatar
Frank Bossen committed
479 480 481 482 483 484 485 486
      let mv = MotionVector {
        row: 8 * (y as i16 - po.y as i16),
        col: 8 * (x as i16 - po.x as i16)
      };

      let rate1 = get_mv_rate(mv, pmv[0], allow_high_precision_mv);
      let rate2 = get_mv_rate(mv, pmv[1], allow_high_precision_mv);
      let rate = rate1.min(rate2 + 1);
487
      let cost = 256 * sad as u64 + rate as u64 * lambda as u64;
Frank Bossen's avatar
Frank Bossen committed
488

Luca Barbato's avatar
Luca Barbato committed
489 490 491 492 493
      (cost, mv)
  }).min_by_key(|(c, _)| *c).unwrap();

    *lowest_cost = cost;
    *best_mv = mv;
494 495 496 497 498 499 500 501 502 503
}

// Adjust block offset such that entire block lies within frame boundaries
fn adjust_bo(bo: &BlockOffset, fi: &FrameInvariants, blk_w: usize, blk_h: usize) -> BlockOffset {
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

Frank Bossen's avatar
Frank Bossen committed
504 505 506 507 508 509 510 511 512 513 514 515 516
fn get_mv_rate(a: MotionVector, b: MotionVector, allow_high_precision_mv: bool) -> u32 {
  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
    if d == 0 {
      0
    } else {
      2 * (16 - d.abs().leading_zeros())
    }
  }

  diff_to_rate(a.row - b.row, allow_high_precision_mv) + diff_to_rate(a.col - b.col, allow_high_precision_mv)
}

517
pub fn estimate_motion_ss4(
Kyle Siefring's avatar
Kyle Siefring committed
518
  fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, ref_idx: usize,
519
  bo: &BlockOffset
520 521 522 523 524 525 526 527 528
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
529

530 531
    let range_x = 192 * fi.me_range_scale as isize;
    let range_y = 64 * fi.me_range_scale as isize;
532
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
533 534 535 536
    let x_lo = po.x + (((-range_x).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range_x).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range_y).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range_y).min(mvy_max / 8)) >> 2);
537

538
    let mut lowest_cost = std::u64::MAX;
539 540
    let mut best_mv = MotionVector { row: 0, col: 0 };

Frank Bossen's avatar
Frank Bossen committed
541
    // Divide by 16 to account for subsampling, 0.125 is a fudge factor
542
    let lambda = (fi.me_lambda * 256.0 / 16.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
543

544
    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
545 546 547 548 549 550 551 552 553
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
554
      &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
555 556
      &po,
      1,
557
      fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
558 559 560
      lambda,
      &[MotionVector { row: 0, col: 0 }; 2],
      fi.allow_high_precision_mv
561 562 563 564 565 566 567 568 569
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

pub fn estimate_motion_ss2(
Kyle Siefring's avatar
Kyle Siefring committed
570
  fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, ref_idx: usize,
571
  bo: &BlockOffset, pmvs: &[Option<MotionVector>; 3]
572 573 574 575 576 577 578 579 580 581
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1
    };
    let range = 16;
582
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi.w_in_b, fi.h_in_b, &bo_adj, blk_w, blk_h);
583

584
    let mut lowest_cost = std::u64::MAX;
585 586
    let mut best_mv = MotionVector { row: 0, col: 0 };

Frank Bossen's avatar
Frank Bossen committed
587
    // Divide by 4 to account for subsampling, 0.125 is a fudge factor
588
    let lambda = (fi.me_lambda * 256.0 / 4.0 * 0.125) as u32;
Frank Bossen's avatar
Frank Bossen committed
589

590 591
    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
592 593 594 595
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).max(mvx_min / 8).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).max(mvy_min / 8).min(mvy_max / 8)) >> 1);
596 597

        full_search(
Kyle Siefring's avatar
Kyle Siefring committed
598 599 600 601 602 603 604 605 606
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          &mut best_mv,
Frank Bossen's avatar
Frank Bossen committed
607
          &mut lowest_cost,
Kyle Siefring's avatar
Kyle Siefring committed
608 609
          &po,
          1,
610
          fi.sequence.bit_depth,
Frank Bossen's avatar
Frank Bossen committed
611 612 613
          lambda,
          &[MotionVector { row: 0, col: 0 }; 2],
          fi.allow_high_precision_mv
614 615 616 617 618 619 620 621 622 623
        );
      }
    }

    Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
  } else {
    None
  }
}

624 625 626
#[cfg(test)]
pub mod test {
  use super::*;
Raphaël Zumer's avatar
Raphaël Zumer committed
627 628
  use crate::partition::BlockSize;
  use crate::partition::BlockSize::*;
629 630 631 632 633

  // Generate plane data for get_sad_same()
  fn setup_sad() -> (Plane, Plane) {
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
Luca Barbato's avatar
Luca Barbato committed
634

635
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
636
      for (j, pixel) in row.into_iter().enumerate() {
637
        let val = ((j + i) as i32 & 255i32) as u16;
Luca Barbato's avatar
Luca Barbato committed
638
        assert!(val >= u8::min_value().into() &&
639 640
            val <= u8::max_value().into());
        *pixel = val;
641 642 643 644
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
645
      for (j, pixel) in row.into_iter().enumerate() {
646
        let val = (j as i32 - i as i32 & 255i32) as u16;
Luca Barbato's avatar
Luca Barbato committed
647
        assert!(val >= u8::min_value().into() &&
648 649
            val <= u8::max_value().into());
        *pixel = val;
650 651 652 653 654 655 656 657 658 659
      }
    }

    (input_plane, rec_plane)
  }

  // Regression and validation test for SAD computation
  #[test]
  fn get_sad_same() {
    let blocks: Vec<(BlockSize, u32)> = vec![
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
      (BLOCK_4X4, 1912),
      (BLOCK_4X8, 3496),
      (BLOCK_8X4, 4296),
      (BLOCK_8X8, 7824),
      (BLOCK_8X16, 14416),
      (BLOCK_16X8, 16592),
      (BLOCK_16X16, 31136),
      (BLOCK_16X32, 59552),
      (BLOCK_32X16, 60064),
      (BLOCK_32X32, 120128),
      (BLOCK_32X64, 250176),
      (BLOCK_64X32, 186688),
      (BLOCK_64X64, 438912),
      (BLOCK_64X128, 1016768),
      (BLOCK_128X64, 654272),
      (BLOCK_128X128, 1689792),
      (BLOCK_4X16, 6664),
      (BLOCK_16X4, 8680),
      (BLOCK_8X32, 27600),
      (BLOCK_32X8, 31056),
      (BLOCK_16X64, 116384),
      (BLOCK_64X16, 93344),
682 683
    ];

Kyle Siefring's avatar
Kyle Siefring committed
684
    let bit_depth: usize = 8;
685 686 687
    let (input_plane, rec_plane) = setup_sad();

    for block in blocks {
Kyle Siefring's avatar
Kyle Siefring committed
688 689 690
      let bsw = block.0.width();
      let bsh = block.0.height();
      let po = PlaneOffset { x: 40, y: 40 };
691

Kyle Siefring's avatar
Kyle Siefring committed
692 693
      let mut input_slice = input_plane.slice(&po);
      let mut rec_slice = rec_plane.slice(&po);
694

Kyle Siefring's avatar
Kyle Siefring committed
695 696 697 698
      assert_eq!(
        block.1,
        get_sad(&mut input_slice, &mut rec_slice, bsw, bsh, bit_depth)
      );
699 700 701
    }
  }
}