Commit 3450a2e1 authored by David Michael Barr's avatar David Michael Barr

Allow unaligned sad_sse2 calls below 16x16

parent 4cf078ea
......@@ -135,14 +135,13 @@ mod nasm {
assert!(mem::size_of::<T>() == 1, "only implemented for u8 for now");
// FIXME unaligned blocks coming from hres/qres ME search
let ptr_align_log2 = (plane_org.as_ptr() as usize).trailing_zeros() as usize;
if ptr_align_log2 < 2 {
return super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, 8);
}
// The largest unaligned-safe function is for 8x8
let ptr_align = 1 << ptr_align_log2.max(3);
let mut sum = 0 as u32;
let org_stride = (plane_org.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
let ref_stride = (plane_ref.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
assert!(blk_h >= 4 && blk_w >= 4);
let step_size = blk_h.min(blk_w).min(1 << ptr_align_log2);
let step_size = blk_h.min(blk_w).min(ptr_align);
let func = match step_size.ilog() {
3 => rav1e_sad4x4_sse2,
4 => rav1e_sad8x8_sse2,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment