From eefd26ffb05a77fd8c94f98526bd75ebddc76226 Mon Sep 17 00:00:00 2001 From: Kyle Siefring <kylesiefring@gmail.com> Date: Mon, 20 Aug 2018 16:32:40 -0400 Subject: [PATCH] Implement idct 4x4 in rust. (#464) * Implement idct 4x4 in rust. * Add uncommited changes [squashme] * Make some changes based on code review. * More changes. * For code review --- src/transform.rs | 108 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 100 insertions(+), 8 deletions(-) diff --git a/src/transform.rs b/src/transform.rs index 427304c9..e4eea8c3 100755 --- a/src/transform.rs +++ b/src/transform.rs @@ -9,9 +9,97 @@ extern crate libc; +use std::cmp; + use partition::TxSize; use partition::TxType; +use util::*; + +static COSPI_INV: [i32; 64] = [ + 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973, 3948, + 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461, + 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896, 2824, 2751, 2675, + 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660, + 1567, 1474, 1380, 1285, 1189, 1092, 995, 897, 799, 700, 601, 501, 401, 301, + 201, 101, +]; + +// performs half a butterfly +#[inline] +fn half_btf(w0: i32, in0: i32, w1: i32, in1: i32, bit: usize) -> i32 { + let result = (w0 * in0) + (w1 * in1); + round_shift(result, bit) +} + +#[inline] +fn round_shift(value: i32, bit: usize) -> i32 { + (value + (1 << (bit - 1))) >> bit +} + +// clamps value to a signed integer type of bit bits +#[inline] +fn clamp_value(value: i32, bit: usize) -> i32 { + let max_value: i32 = ((1i64 << (bit - 1)) - 1) as i32; + let min_value: i32 = (-(1i64 << (bit - 1))) as i32; + clamp(value, min_value, max_value) +} + +fn av1_idct4(input: [i32; 4], output: &mut [i32], range: usize) { + let cos_bit = 12; + // stage 0 + + // stage 1 + let stg1 = [input[0], input[2], input[1], input[3]]; + + // stage 2 + let stg2 = [ + half_btf(COSPI_INV[32], stg1[0], COSPI_INV[32], stg1[1], cos_bit), + half_btf(COSPI_INV[32], stg1[0], -COSPI_INV[32], stg1[1], cos_bit), + half_btf(COSPI_INV[48], stg1[2], -COSPI_INV[16], stg1[3], cos_bit), + half_btf(COSPI_INV[16], stg1[2], COSPI_INV[48], stg1[3], cos_bit) + ]; + + // stage 3 + output[0] = clamp_value(stg2[0] + stg2[3], range); + output[1] = clamp_value(stg2[1] + stg2[2], range); + output[2] = clamp_value(stg2[1] - stg2[2], range); + output[3] = clamp_value(stg2[0] - stg2[3], range); +} + +fn inv_txfm2d_add_4x4_rs( + input: &[i32], output: &mut [u16], stride: usize, bd: usize +) { + let mut buffer = [0i32; 4 * 4]; + // perform inv txfm on every row + let range = bd + 8; + for (input_slice, buffer_slice) in input.chunks(4).zip(buffer.chunks_mut(4)) + { + let mut temp_in: [i32; 4] = [0; 4]; + for (raw, clamped) in input_slice.iter().zip(temp_in.iter_mut()) { + *clamped = clamp_value(*raw, range); + } + av1_idct4(temp_in, buffer_slice, range); + } + + // perform inv txfm on every col + let range = cmp::max(bd + 6, 16); + for c in 0..4 { + let mut temp_in: [i32; 4] = [0; 4]; + let mut temp_out: [i32; 4] = [0; 4]; + for (raw, clamped) in buffer[c..].iter().step_by(4).zip(temp_in.iter_mut()) + { + *clamped = clamp_value(*raw, range); + } + av1_idct4(temp_in, &mut temp_out, range); + for (temp, out) in + temp_out.iter().zip(output[c..].iter_mut().step_by(stride).take(4)) { + *out = + clamp(*out as i32 + round_shift(*temp, 4), 0, (1 << bd) - 1) as u16; + } + } +} + // In libaom, functions that have more than one specialization use function // pointers, so we need to declare them as static fields and call them // indirectly. Otherwise, we call SSE or C variants directly. To fully @@ -136,14 +224,18 @@ fn iht4x4_add( ) { // SIMD code may assert for transform types beyond TxType::IDTX. if tx_type < TxType::IDTX { - unsafe { - av1_inv_txfm2d_add_4x4( - input.as_ptr(), - output.as_mut_ptr(), - stride as libc::c_int, - tx_type as libc::c_int, - bit_depth as libc::c_int - ); + if tx_type == TxType::DCT_DCT { + inv_txfm2d_add_4x4_rs(input, output, stride, bit_depth); + } else { + unsafe { + av1_inv_txfm2d_add_4x4( + input.as_ptr(), + output.as_mut_ptr(), + stride as libc::c_int, + tx_type as libc::c_int, + bit_depth as libc::c_int + ); + } } } else { unsafe { -- GitLab