Commit 8251736b authored by Angie Chiang's avatar Angie Chiang

Let adst4's precision be adjustable

Change-Id: I6e251328b2934130992dbd355cfdffc3c721d357
parent 06250276
......@@ -742,7 +742,9 @@ void av1_iadst4_new(const int32_t *input, int32_t *output,
(void)cos_bit;
const int32_t size = 4;
int32_t stage = 0;
int64_t s0, s1, s2, s3, s4, s5, s6, s7;
int bit = cos_bit[0];
const int32_t *sinpi = sinpi_arr(bit);
int32_t s0, s1, s2, s3, s4, s5, s6, s7;
// stage 0;
apply_range(stage, input, input, size, stage_range[stage]);
......@@ -756,29 +758,29 @@ void av1_iadst4_new(const int32_t *input, int32_t *output,
return;
}
s0 = sinpi_1_9 * x0;
s1 = sinpi_2_9 * x0;
s2 = sinpi_3_9 * x1;
s3 = sinpi_4_9 * x2;
s4 = sinpi_1_9 * x2;
s5 = sinpi_2_9 * x3;
s6 = sinpi_4_9 * x3;
s0 = sinpi[1] * x0;
s1 = sinpi[2] * x0;
s2 = sinpi[3] * x1;
s3 = sinpi[4] * x2;
s4 = sinpi[1] * x2;
s5 = sinpi[2] * x3;
s6 = sinpi[4] * x3;
s7 = x0 - x2 + x3;
s0 = s0 + s3 + s5;
s1 = s1 - s4 - s6;
s3 = s2;
s2 = sinpi_3_9 * s7;
s2 = sinpi[3] * s7;
// 1-D transform scaling factor is sqrt(2).
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
// + 1b (addition) = 29b.
// Hence the output bit depth is 15b.
stage = 3;
output[0] = (int32_t)dct_const_round_shift(s0 + s3);
output[1] = (int32_t)dct_const_round_shift(s1 + s3);
output[2] = (int32_t)dct_const_round_shift(s2);
output[3] = (int32_t)dct_const_round_shift(s0 + s1 - s3);
output[0] = round_shift(s0 + s3, bit);
output[1] = round_shift(s1 + s3, bit);
output[2] = round_shift(s2, bit);
output[3] = round_shift(s0 + s1 - s3, bit);
apply_range(stage, input, output, size, stage_range[stage]);
}
......
......@@ -75,10 +75,23 @@ static const int32_t cospi_arr_data[7][64] = {
14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
};
// sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(kPi/9) * 2 / 3) * (1 <<
// (cos_bit_min + i)))
static const int32_t sinpi_arr_data[7][5] = {
{ 0, 330, 621, 836, 951 }, { 0, 660, 1241, 1672, 1902 },
{ 0, 1321, 2482, 3344, 3803 }, { 0, 2642, 4965, 6689, 7606 },
{ 0, 5283, 9929, 13377, 15212 }, { 0, 10566, 19858, 26755, 30425 },
{ 0, 21133, 39717, 53510, 60849 }
};
static INLINE const int32_t *cospi_arr(int n) {
return cospi_arr_data[n - cos_bit_min];
}
static INLINE const int32_t *sinpi_arr(int n) {
return sinpi_arr_data[n - cos_bit_min];
}
static INLINE int32_t round_shift(int32_t value, int bit) {
assert(bit >= 1);
return (int32_t)(((int64_t)value + (1ll << (bit - 1))) >> bit);
......
......@@ -72,12 +72,12 @@ static void idct4x4_sse4_1(__m128i *in, int bit) {
}
static void iadst4x4_sse4_1(__m128i *in, int bit) {
bit = 14;
const int32_t *sinpi = sinpi_arr(bit);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
const __m128i sinpi1 = _mm_set1_epi32((int)sinpi_1_9);
const __m128i sinpi2 = _mm_set1_epi32((int)sinpi_2_9);
const __m128i sinpi3 = _mm_set1_epi32((int)sinpi_3_9);
const __m128i sinpi4 = _mm_set1_epi32((int)sinpi_4_9);
const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]);
const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]);
const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]);
const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]);
__m128i t;
__m128i s0, s1, s2, s3, s4, s5, s6, s7;
__m128i x0, x1, x2, x3;
......
......@@ -694,8 +694,10 @@ void av1_fadst4_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range) {
(void)cos_bit;
(void)stage_range;
int64_t x0, x1, x2, x3;
int64_t s0, s1, s2, s3, s4, s5, s6, s7;
int bit = cos_bit[0];
const int32_t *sinpi = sinpi_arr(bit);
int32_t x0, x1, x2, x3;
int32_t s0, s1, s2, s3, s4, s5, s6, s7;
x0 = input[0];
x1 = input[1];
......@@ -707,17 +709,17 @@ void av1_fadst4_new(const int32_t *input, int32_t *output,
return;
}
s0 = sinpi_1_9 * x0;
s1 = sinpi_4_9 * x0;
s2 = sinpi_2_9 * x1;
s3 = sinpi_1_9 * x1;
s4 = sinpi_3_9 * x2;
s5 = sinpi_4_9 * x3;
s6 = sinpi_2_9 * x3;
s0 = sinpi[1] * x0;
s1 = sinpi[4] * x0;
s2 = sinpi[2] * x1;
s3 = sinpi[1] * x1;
s4 = sinpi[3] * x2;
s5 = sinpi[4] * x3;
s6 = sinpi[2] * x3;
s7 = x0 + x1 - x3;
x0 = s0 + s2 + s5;
x1 = sinpi_3_9 * s7;
x1 = sinpi[3] * s7;
x2 = s1 - s3 + s6;
x3 = s4;
......@@ -727,10 +729,10 @@ void av1_fadst4_new(const int32_t *input, int32_t *output,
s3 = x2 - x0 + x3;
// 1-D transform scaling factor is sqrt(2).
output[0] = (int32_t)fdct_round_shift(s0);
output[1] = (int32_t)fdct_round_shift(s1);
output[2] = (int32_t)fdct_round_shift(s2);
output[3] = (int32_t)fdct_round_shift(s3);
output[0] = round_shift(s0, bit);
output[1] = round_shift(s1, bit);
output[2] = round_shift(s2, bit);
output[3] = round_shift(s3, bit);
}
void av1_fadst8_new(const int32_t *input, int32_t *output,
......
......@@ -121,12 +121,12 @@ static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) {
}
static void fadst4x4_sse4_1(__m128i *in, int bit) {
bit = 14;
const int32_t *sinpi = sinpi_arr(bit);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
const __m128i sinpi1 = _mm_set1_epi32((int)sinpi_1_9);
const __m128i sinpi2 = _mm_set1_epi32((int)sinpi_2_9);
const __m128i sinpi3 = _mm_set1_epi32((int)sinpi_3_9);
const __m128i sinpi4 = _mm_set1_epi32((int)sinpi_4_9);
const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]);
const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]);
const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]);
const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]);
__m128i t;
__m128i s0, s1, s2, s3, s4, s5, s6, s7;
__m128i x0, x1, x2, x3;
......
......@@ -138,7 +138,7 @@ vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
vector<AV1FwdTxfm2dParam> param_list;
for (int t = 0; t < TX_TYPES; ++t) {
const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X4, 2, 0.5));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X4, 3, 0.5));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X8, 5, 0.5));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X16, 11, 1.2));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_32X32, 70, 6.1));
......@@ -148,15 +148,15 @@ vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
}
#endif // CONFIG_TX64X64
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X8, 3.2, 0.50));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X4, 3.2, 0.64));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X8, 3.2, 0.52));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X4, 3.6, 0.64));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X16, 8, 0.8));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X8, 8, 1.1));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X32, 29, 3.9));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_32X16, 37, 5.9));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X16, 3, 0.6));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X4, 5, 0.9));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X16, 5, 0.6));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X4, 6, 0.9));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X32, 21, 1.2));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_32X8, 13, 1.7));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment