Skip to content
Snippets Groups Projects
Commit 8e6d7a09 authored by David Michael Barr's avatar David Michael Barr Committed by Luca Barbato
Browse files

Make CfL predictor generic over u8 and u16

parent 186efb07
No related branches found
No related tags found
No related merge requests found
...@@ -432,12 +432,13 @@ where ...@@ -432,12 +432,13 @@ where
#[target_feature(enable = "ssse3")] #[target_feature(enable = "ssse3")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn pred_cfl_ssse3( unsafe fn pred_cfl_ssse3(
output: &mut [u16], stride: usize, ac: &[i16], alpha: i16, output: &mut [T], stride: usize, ac: &[i16], alpha: i16,
bit_depth: usize bit_depth: usize
) { ) {
let alpha_sign = _mm_set1_epi16(alpha); let alpha_sign = _mm_set1_epi16(alpha);
let alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9); let alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9);
let dc_q0 = _mm_set1_epi16(*output.as_ptr() as i16); let dc_scalar: u32 = (*output.as_ptr()).into();
let dc_q0 = _mm_set1_epi16(dc_scalar as i16);
let max = _mm_set1_epi16((1 << bit_depth) - 1); let max = _mm_set1_epi16((1 << bit_depth) - 1);
for j in 0..Self::H { for j in 0..Self::H {
...@@ -445,6 +446,7 @@ where ...@@ -445,6 +446,7 @@ where
let line = output.as_mut_ptr().offset((stride * j) as isize); let line = output.as_mut_ptr().offset((stride * j) as isize);
let mut i = 0isize; let mut i = 0isize;
let mut last = _mm_setzero_si128();
while (i as usize) < Self::W { while (i as usize) < Self::W {
let ac_q3 = _mm_loadu_si128(luma.offset(i) as *const _); let ac_q3 = _mm_loadu_si128(luma.offset(i) as *const _);
let ac_sign = _mm_sign_epi16(alpha_sign, ac_q3); let ac_sign = _mm_sign_epi16(alpha_sign, ac_q3);
...@@ -452,11 +454,27 @@ where ...@@ -452,11 +454,27 @@ where
_mm_mulhrs_epi16(_mm_abs_epi16(ac_q3), alpha_q12); _mm_mulhrs_epi16(_mm_abs_epi16(ac_q3), alpha_q12);
let scaled_luma_q0 = _mm_sign_epi16(abs_scaled_luma_q0, ac_sign); let scaled_luma_q0 = _mm_sign_epi16(abs_scaled_luma_q0, ac_sign);
let pred = _mm_add_epi16(scaled_luma_q0, dc_q0); let pred = _mm_add_epi16(scaled_luma_q0, dc_q0);
let res = _mm_min_epi16(max, _mm_max_epi16(pred, _mm_setzero_si128())); if size_of::<T>() == 1 {
if Self::W == 4 { if Self::W < 16 {
_mm_storel_epi64(line.offset(i) as *mut _, res); let res = _mm_packus_epi16(pred, pred);
if Self::W == 4 {
*(line.offset(i) as *mut i32) = _mm_cvtsi128_si32(res);
} else {
_mm_storel_epi64(line.offset(i) as *mut _, res);
}
} else if (i & 15) == 0 {
last = pred;
} else {
let res = _mm_packus_epi16(last, pred);
_mm_storeu_si128(line.offset(i - 8) as *mut _, res);
}
} else { } else {
_mm_storeu_si128(line.offset(i) as *mut _, res); let res = _mm_min_epi16(max, _mm_max_epi16(pred, _mm_setzero_si128()));
if Self::W == 4 {
_mm_storel_epi64(line.offset(i) as *mut _, res);
} else {
_mm_storeu_si128(line.offset(i) as *mut _, res);
}
} }
i += 8; i += 8;
} }
...@@ -465,7 +483,7 @@ where ...@@ -465,7 +483,7 @@ where
#[cfg_attr(feature = "comparative_bench", inline(never))] #[cfg_attr(feature = "comparative_bench", inline(never))]
fn pred_cfl( fn pred_cfl(
output: &mut [u16], stride: usize, ac: &[i16], alpha: i16, output: &mut [T], stride: usize, ac: &[i16], alpha: i16,
bit_depth: usize bit_depth: usize
) { ) {
if alpha == 0 { if alpha == 0 {
...@@ -485,14 +503,14 @@ where ...@@ -485,14 +503,14 @@ where
} }
let sample_max = (1 << bit_depth) - 1; let sample_max = (1 << bit_depth) - 1;
let avg = output[0] as i32; let avg: i32 = output[0].into();
for (line, luma) in for (line, luma) in
output.chunks_mut(stride).zip(ac.chunks(32)).take(Self::H) output.chunks_mut(stride).zip(ac.chunks(32)).take(Self::H)
{ {
for (v, &l) in line[..Self::W].iter_mut().zip(luma[..Self::W].iter()) { for (v, &l) in line[..Self::W].iter_mut().zip(luma[..Self::W].iter()) {
*v = *v =
(avg + get_scaled_luma_q0(alpha, l)).max(0).min(sample_max) as u16; (avg + get_scaled_luma_q0(alpha, l)).max(0).min(sample_max).as_();
} }
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment