Commit 3450a2e1 authored by David Michael Barr's avatar David Michael Barr

Allow unaligned sad_sse2 calls below 16x16

parent 4cf078ea
...@@ -135,14 +135,13 @@ mod nasm { ...@@ -135,14 +135,13 @@ mod nasm {
assert!(mem::size_of::<T>() == 1, "only implemented for u8 for now"); assert!(mem::size_of::<T>() == 1, "only implemented for u8 for now");
// FIXME unaligned blocks coming from hres/qres ME search // FIXME unaligned blocks coming from hres/qres ME search
let ptr_align_log2 = (plane_org.as_ptr() as usize).trailing_zeros() as usize; let ptr_align_log2 = (plane_org.as_ptr() as usize).trailing_zeros() as usize;
if ptr_align_log2 < 2 { // The largest unaligned-safe function is for 8x8
return super::native::get_sad(plane_org, plane_ref, blk_h, blk_w, 8); let ptr_align = 1 << ptr_align_log2.max(3);
}
let mut sum = 0 as u32; let mut sum = 0 as u32;
let org_stride = (plane_org.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t; let org_stride = (plane_org.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
let ref_stride = (plane_ref.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t; let ref_stride = (plane_ref.plane.cfg.stride * mem::size_of::<T>()) as libc::ptrdiff_t;
assert!(blk_h >= 4 && blk_w >= 4); assert!(blk_h >= 4 && blk_w >= 4);
let step_size = blk_h.min(blk_w).min(1 << ptr_align_log2); let step_size = blk_h.min(blk_w).min(ptr_align);
let func = match step_size.ilog() { let func = match step_size.ilog() {
3 => rav1e_sad4x4_sse2, 3 => rav1e_sad4x4_sse2,
4 => rav1e_sad8x8_sse2, 4 => rav1e_sad8x8_sse2,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment