me.rs 13.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

use context::BlockOffset;
use context::BLOCK_TO_PLANE_SHIFT;
12
use context::MI_SIZE;
13 14 15 16
use partition::*;
use plane::*;
use FrameInvariants;
use FrameState;
17

18 19 20

#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
mod nasm {
Kyle Siefring's avatar
Kyle Siefring committed
21
use libc;
22 23
use util::*;
use plane::*;
Kyle Siefring's avatar
Kyle Siefring committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89

extern {
  fn rav1e_sad_4x4_hbd_ssse3(
    src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
    dst_stride: libc::ptrdiff_t
  ) -> u32;

  fn rav1e_sad_8x8_hbd10_ssse3(
    src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
    dst_stride: libc::ptrdiff_t
  ) -> u32;

  fn rav1e_sad_16x16_hbd_ssse3(
    src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
    dst_stride: libc::ptrdiff_t
  ) -> u32;

  fn rav1e_sad_32x32_hbd10_ssse3(
    src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
    dst_stride: libc::ptrdiff_t
  ) -> u32;

  fn rav1e_sad_64x64_hbd10_ssse3(
    src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
    dst_stride: libc::ptrdiff_t
  ) -> u32;

  fn rav1e_sad_128x128_hbd10_ssse3(
    src: *const u16, src_stride: libc::ptrdiff_t, dst: *const u16,
    dst_stride: libc::ptrdiff_t
  ) -> u32;
}

#[target_feature(enable = "ssse3")]
unsafe fn sad_ssse3(
  plane_org: &PlaneSlice, plane_ref: &PlaneSlice, blk_h: usize, blk_w: usize,
  bit_depth: usize
) -> u32 {
  let mut sum = 0 as u32;
  // TODO: stride *2??? What is the correct way to do this?
  let org_stride = plane_org.plane.cfg.stride as libc::ptrdiff_t * 2;
  let ref_stride = plane_ref.plane.cfg.stride as libc::ptrdiff_t * 2;
  assert!(blk_h >= 4 && blk_w >= 4);
  let step_size =
    blk_h.min(blk_w).min(if bit_depth <= 10 { 128 } else { 4 });
  let func = match step_size.ilog() {
    3 => rav1e_sad_4x4_hbd_ssse3,
    4 => rav1e_sad_8x8_hbd10_ssse3,
    5 => rav1e_sad_16x16_hbd_ssse3,
    6 => rav1e_sad_32x32_hbd10_ssse3,
    7 => rav1e_sad_64x64_hbd10_ssse3,
    8 => rav1e_sad_128x128_hbd10_ssse3,
    _ => rav1e_sad_128x128_hbd10_ssse3
  };
  for r in (0..blk_h).step_by(step_size) {
    for c in (0..blk_w).step_by(step_size) {
      let org_slice = plane_org.subslice(c, r);
      let ref_slice = plane_ref.subslice(c, r);
      let org_ptr = org_slice.as_slice().as_ptr();
      let ref_ptr = ref_slice.as_slice().as_ptr();
      sum += func(org_ptr, org_stride, ref_ptr, ref_stride);
    }
  }
  return sum;
}

90
#[inline(always)]
91
pub fn get_sad(
Kyle Siefring's avatar
Kyle Siefring committed
92 93
  plane_org: &PlaneSlice, plane_ref: &PlaneSlice, blk_h: usize, blk_w: usize,
  bit_depth: usize
94
) -> u32 {
Luca Barbato's avatar
Luca Barbato committed
95
  #[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
Kyle Siefring's avatar
Kyle Siefring committed
96 97 98 99 100 101 102
  {
    if is_x86_feature_detected!("ssse3") && blk_h >= 4 && blk_w >= 4 {
      return unsafe {
        sad_ssse3(plane_org, plane_ref, blk_h, blk_w, bit_depth)
      };
    }
  }
103 104 105 106 107 108 109 110 111 112 113 114
  super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, bit_depth)
}
}

mod native {
use plane::*;

#[inline(always)]
pub fn get_sad(
  plane_org: &PlaneSlice, plane_ref: &PlaneSlice, blk_h: usize, blk_w: usize,
  _bit_depth: usize
) -> u32 {
115 116
  let mut sum = 0 as u32;

117 118 119 120 121 122 123 124 125 126 127 128 129
  let org_iter = plane_org.iter_width(blk_w);
  let ref_iter = plane_ref.iter_width(blk_w);

  for (slice_org, slice_ref) in org_iter.take(blk_h).zip(ref_iter) {
      sum += slice_org
        .iter()
        .zip(slice_ref)
        .map(|(&a, &b)| (a as i32 - b as i32).abs() as u32)
        .sum::<u32>();
  }

  sum
}
130 131 132 133 134 135 136
}

#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
pub use self::nasm::get_sad;

#[cfg(any(not(target_arch = "x86_64"), windows, not(feature = "nasm")))]
pub use self::native::get_sad;
137 138 139 140 141 142 143 144 145 146 147 148

fn get_mv_range(fi: &FrameInvariants, bo: &BlockOffset, blk_w: usize, blk_h: usize) -> (isize, isize, isize, isize) {
  let border_w = 128 + blk_w as isize * 8;
  let border_h = 128 + blk_h as isize * 8;
  let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
  let mvx_max = (fi.w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
  let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
  let mvy_max = (fi.h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;

  (mvx_min, mvx_max, mvy_min, mvy_max)
}

149
pub fn motion_estimation(
Kyle Siefring's avatar
Kyle Siefring committed
150 151
  fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, bo: &BlockOffset,
  ref_frame: usize, pmv: MotionVector, bit_depth: usize
152
) -> MotionVector {
153
  match fi.rec_buffer.frames[fi.ref_frames[ref_frame - LAST_FRAME] as usize] {
154
    Some(ref rec) => {
155 156 157 158
      let po = PlaneOffset {
        x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT,
        y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT
      };
159
      let range = 16;
160 161
      let blk_w = bsize.width();
      let blk_h = bsize.height();
162
      let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi, bo, blk_w, blk_h);
163 164 165 166
      let x_lo = po.x + ((-range + (pmv.col / 8) as isize).max(mvx_min / 8));
      let x_hi = po.x + ((range + (pmv.col / 8) as isize).min(mvx_max / 8));
      let y_lo = po.y + ((-range + (pmv.row / 8) as isize).max(mvy_min / 8));
      let y_hi = po.y + ((range + (pmv.row / 8) as isize).min(mvy_max / 8));
167

168
      let mut lowest_sad = 128 * 128 * 4096 as u32;
169 170
      let mut best_mv = MotionVector { row: 0, col: 0 };

171
      full_search(
Kyle Siefring's avatar
Kyle Siefring committed
172 173 174 175 176 177 178 179 180 181 182 183 184
        x_lo,
        x_hi,
        y_lo,
        y_hi,
        blk_h,
        blk_w,
        &fs.input.planes[0],
        &rec.frame.planes[0],
        &mut best_mv,
        &mut lowest_sad,
        &po,
        2,
        bit_depth
185
      );
Frank Bossen's avatar
Frank Bossen committed
186 187

      let mode = PredictionMode::NEWMV;
fbossen's avatar
fbossen committed
188
      let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);
Frank Bossen's avatar
Frank Bossen committed
189

190
      let mut steps = vec![8, 4, 2];
191 192 193 194 195
      if fi.allow_high_precision_mv {
        steps.push(1);
      }

      for step in steps {
Frank Bossen's avatar
Frank Bossen committed
196 197 198 199
        let center_mv_h = best_mv;
        for i in 0..3 {
          for j in 0..3 {
            // Skip the center point that was already tested
200 201 202
            if i == 1 && j == 1 {
              continue;
            }
Frank Bossen's avatar
Frank Bossen committed
203

204 205 206 207
            let cand_mv = MotionVector {
              row: center_mv_h.row + step * (i as i16 - 1),
              col: center_mv_h.col + step * (j as i16 - 1)
            };
Frank Bossen's avatar
Frank Bossen committed
208

209
            if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max {
210 211
              continue;
            }
212
            if (cand_mv.row as isize) < mvy_min || (cand_mv.row as isize) > mvy_max {
213 214 215
              continue;
            }

Frank Bossen's avatar
Frank Bossen committed
216
            {
217 218
              let tmp_slice =
                &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });
Frank Bossen's avatar
Frank Bossen committed
219

220
              mode.predict_inter(
221
                fi, 0, &po, tmp_slice, blk_w, blk_h, [ref_frame, NONE_FRAME],
Josh Holmer's avatar
Josh Holmer committed
222
                [cand_mv, MotionVector{ row: 0, col: 0 }], 8,
223
              );
Frank Bossen's avatar
Frank Bossen committed
224 225
            }

226 227
            let plane_org = fs.input.planes[0].slice(&po);
            let plane_ref = tmp_plane.slice(&PlaneOffset { x: 0, y: 0 });
Frank Bossen's avatar
Frank Bossen committed
228

Kyle Siefring's avatar
Kyle Siefring committed
229
            let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
Frank Bossen's avatar
Frank Bossen committed
230 231 232 233 234 235 236 237 238

            if sad < lowest_sad {
              lowest_sad = sad;
              best_mv = cand_mv;
            }
          }
        }
      }

239
      best_mv
240
    }
241

242
    None => MotionVector { row: 0, col: 0 }
243 244
  }
}
245

Kyle Siefring's avatar
Kyle Siefring committed
246 247 248 249 250
fn full_search(
  x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize,
  blk_w: usize, p_org: &Plane, p_ref: &Plane, best_mv: &mut MotionVector,
  lowest_sad: &mut u32, po: &PlaneOffset, step: usize, bit_depth: usize
) {
251 252 253 254 255
  for y in (y_lo..y_hi).step_by(step) {
    for x in (x_lo..x_hi).step_by(step) {
      let plane_org = p_org.slice(po);
      let plane_ref = p_ref.slice(&PlaneOffset { x, y });

Kyle Siefring's avatar
Kyle Siefring committed
256
      let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w, bit_depth);
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277

      if sad < *lowest_sad {
        *lowest_sad = sad;
        *best_mv = MotionVector {
          row: 8 * (y as i16 - po.y as i16),
          col: 8 * (x as i16 - po.x as i16)
        }
      }
    }
  }
}

// Adjust block offset such that entire block lies within frame boundaries
fn adjust_bo(bo: &BlockOffset, fi: &FrameInvariants, blk_w: usize, blk_h: usize) -> BlockOffset {
  BlockOffset {
    x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
    y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
  }
}

pub fn estimate_motion_ss4(
Kyle Siefring's avatar
Kyle Siefring committed
278 279
  fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, ref_idx: usize,
  bo: &BlockOffset, bit_depth: usize
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
    };
    let range = 64 * fi.me_range_scale as isize;
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi, &bo_adj, blk_w, blk_h);
    let x_lo = po.x + (((-range).max(mvx_min / 8)) >> 2);
    let x_hi = po.x + (((range).min(mvx_max / 8)) >> 2);
    let y_lo = po.y + (((-range).max(mvy_min / 8)) >> 2);
    let y_hi = po.y + (((range).min(mvy_max / 8)) >> 2);

    let mut lowest_sad = ((blk_w >> 2) * (blk_h >> 2) * 4096) as u32;
    let mut best_mv = MotionVector { row: 0, col: 0 };

    full_search(
Kyle Siefring's avatar
Kyle Siefring committed
300 301 302 303 304 305 306 307 308 309 310 311 312
      x_lo,
      x_hi,
      y_lo,
      y_hi,
      blk_h >> 2,
      blk_w >> 2,
      &fs.input_qres,
      &rec.input_qres,
      &mut best_mv,
      &mut lowest_sad,
      &po,
      1,
      bit_depth
313 314 315 316 317 318 319 320 321
    );

    Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
  } else {
    None
  }
}

pub fn estimate_motion_ss2(
Kyle Siefring's avatar
Kyle Siefring committed
322 323
  fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, ref_idx: usize,
  bo: &BlockOffset, pmvs: &[Option<MotionVector>; 3], bit_depth: usize
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
) -> Option<MotionVector> {
  if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
    let blk_w = bsize.width();
    let blk_h = bsize.height();
    let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
    let po = PlaneOffset {
      x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
      y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1
    };
    let range = 16;
    let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi, &bo_adj, blk_w, blk_h);

    let mut lowest_sad = ((blk_w >> 1) * (blk_h >> 1) * 4096) as u32;
    let mut best_mv = MotionVector { row: 0, col: 0 };

    for omv in pmvs.iter() {
      if let Some(pmv) = omv {
        let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8)) >> 1);
        let x_hi = po.x + (((pmv.col as isize / 8 + range).min(mvx_max / 8)) >> 1);
        let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8)) >> 1);
        let y_hi = po.y + (((pmv.row as isize / 8 + range).min(mvy_max / 8)) >> 1);

        full_search(
Kyle Siefring's avatar
Kyle Siefring committed
347 348 349 350 351 352 353 354 355 356 357 358 359
          x_lo,
          x_hi,
          y_lo,
          y_hi,
          blk_h >> 1,
          blk_w >> 1,
          &fs.input_hres,
          &rec.input_hres,
          &mut best_mv,
          &mut lowest_sad,
          &po,
          1,
          bit_depth
360 361 362 363 364 365 366 367 368 369
        );
      }
    }

    Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
  } else {
    None
  }
}

370 371 372 373 374 375 376 377
#[cfg(test)]
pub mod test {
  use super::*;

  // Generate plane data for get_sad_same()
  fn setup_sad() -> (Plane, Plane) {
    let mut input_plane = Plane::new(640, 480, 0, 0, 128 + 8, 128 + 8);
    let mut rec_plane = input_plane.clone();
Luca Barbato's avatar
Luca Barbato committed
378

379 380
    for (i, row) in input_plane.data.chunks_mut(input_plane.cfg.stride).enumerate() {
      for (j, mut pixel) in row.into_iter().enumerate() {
381
        let val = ((j + i) as i32 & 255i32) as u16;
Luca Barbato's avatar
Luca Barbato committed
382
        assert!(val >= u8::min_value().into() &&
383 384
            val <= u8::max_value().into());
        *pixel = val;
385 386 387 388 389
      }
    }

    for (i, row) in rec_plane.data.chunks_mut(rec_plane.cfg.stride).enumerate() {
      for (j, mut pixel) in row.into_iter().enumerate() {
390
        let val = (j as i32 - i as i32 & 255i32) as u16;
Luca Barbato's avatar
Luca Barbato committed
391
        assert!(val >= u8::min_value().into() &&
392 393
            val <= u8::max_value().into());
        *pixel = val;
394 395 396 397 398 399 400 401 402 403 404 405 406
      }
    }

    (input_plane, rec_plane)
  }

  // Regression and validation test for SAD computation
  #[test]
  fn get_sad_same() {
    use partition::BlockSize;
    use partition::BlockSize::*;

    let blocks: Vec<(BlockSize, u32)> = vec![
407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
      (BLOCK_4X4, 1912),
      (BLOCK_4X8, 3496),
      (BLOCK_8X4, 4296),
      (BLOCK_8X8, 7824),
      (BLOCK_8X16, 14416),
      (BLOCK_16X8, 16592),
      (BLOCK_16X16, 31136),
      (BLOCK_16X32, 59552),
      (BLOCK_32X16, 60064),
      (BLOCK_32X32, 120128),
      (BLOCK_32X64, 250176),
      (BLOCK_64X32, 186688),
      (BLOCK_64X64, 438912),
      (BLOCK_64X128, 1016768),
      (BLOCK_128X64, 654272),
      (BLOCK_128X128, 1689792),
      (BLOCK_4X16, 6664),
      (BLOCK_16X4, 8680),
      (BLOCK_8X32, 27600),
      (BLOCK_32X8, 31056),
      (BLOCK_16X64, 116384),
      (BLOCK_64X16, 93344),
429 430
    ];

Kyle Siefring's avatar
Kyle Siefring committed
431
    let bit_depth: usize = 8;
432 433 434
    let (input_plane, rec_plane) = setup_sad();

    for block in blocks {
Kyle Siefring's avatar
Kyle Siefring committed
435 436 437
      let bsw = block.0.width();
      let bsh = block.0.height();
      let po = PlaneOffset { x: 40, y: 40 };
438

Kyle Siefring's avatar
Kyle Siefring committed
439 440
      let mut input_slice = input_plane.slice(&po);
      let mut rec_slice = rec_plane.slice(&po);
441

Kyle Siefring's avatar
Kyle Siefring committed
442 443 444 445
      assert_eq!(
        block.1,
        get_sad(&mut input_slice, &mut rec_slice, bsw, bsh, bit_depth)
      );
446 447 448
    }
  }
}