Commit a87e8751 authored by Yaowu Xu's avatar Yaowu Xu
Browse files

Replace experimental fdct16 with vp9 version

So c and assembly produce matching results.

Change-Id: I3fb5ba193ae7fae792be4a32ac4f0bbece4763c1
parent 16c47bda
......@@ -151,180 +151,148 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
range_check(output, 8, 16);
}
static void fdct16(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[16];
// stage 0
range_check(input, 16, 13);
// stage 1
output[0] = input[0] + input[15];
output[1] = input[1] + input[14];
output[2] = input[2] + input[13];
output[3] = input[3] + input[12];
output[4] = input[4] + input[11];
output[5] = input[5] + input[10];
output[6] = input[6] + input[9];
output[7] = input[7] + input[8];
output[8] = input[7] - input[8];
output[9] = input[6] - input[9];
output[10] = input[5] - input[10];
output[11] = input[4] - input[11];
output[12] = input[3] - input[12];
output[13] = input[2] - input[13];
output[14] = input[1] - input[14];
output[15] = input[0] - input[15];
range_check(output, 16, 14);
// stage 2
step[0] = output[0] + output[7];
step[1] = output[1] + output[6];
step[2] = output[2] + output[5];
step[3] = output[3] + output[4];
step[4] = output[3] - output[4];
step[5] = output[2] - output[5];
step[6] = output[1] - output[6];
step[7] = output[0] - output[7];
step[8] = output[8];
step[9] = output[9];
temp = output[10] * -cospi_16_64 + output[13] * cospi_16_64;
step[10] = (tran_low_t)fdct_round_shift(temp);
temp = output[11] * -cospi_16_64 + output[12] * cospi_16_64;
step[11] = (tran_low_t)fdct_round_shift(temp);
temp = output[12] * cospi_16_64 + output[11] * cospi_16_64;
step[12] = (tran_low_t)fdct_round_shift(temp);
temp = output[13] * cospi_16_64 + output[10] * cospi_16_64;
step[13] = (tran_low_t)fdct_round_shift(temp);
step[14] = output[14];
step[15] = output[15];
range_check(step, 16, 15);
// stage 3
output[0] = step[0] + step[3];
output[1] = step[1] + step[2];
output[2] = step[1] - step[2];
output[3] = step[0] - step[3];
output[4] = step[4];
temp = step[5] * -cospi_16_64 + step[6] * cospi_16_64;
output[5] = (tran_low_t)fdct_round_shift(temp);
temp = step[6] * cospi_16_64 + step[5] * cospi_16_64;
output[6] = (tran_low_t)fdct_round_shift(temp);
output[7] = step[7];
output[8] = step[8] + step[11];
output[9] = step[9] + step[10];
output[10] = step[9] - step[10];
output[11] = step[8] - step[11];
output[12] = step[15] - step[12];
output[13] = step[14] - step[13];
output[14] = step[14] + step[13];
output[15] = step[15] + step[12];
range_check(output, 16, 16);
// stage 4
temp = output[0] * cospi_16_64 + output[1] * cospi_16_64;
step[0] = (tran_low_t)fdct_round_shift(temp);
temp = output[1] * -cospi_16_64 + output[0] * cospi_16_64;
step[1] = (tran_low_t)fdct_round_shift(temp);
temp = output[2] * cospi_24_64 + output[3] * cospi_8_64;
step[2] = (tran_low_t)fdct_round_shift(temp);
temp = output[3] * cospi_24_64 + output[2] * -cospi_8_64;
step[3] = (tran_low_t)fdct_round_shift(temp);
step[4] = output[4] + output[5];
step[5] = output[4] - output[5];
step[6] = output[7] - output[6];
step[7] = output[7] + output[6];
step[8] = output[8];
temp = output[9] * -cospi_8_64 + output[14] * cospi_24_64;
step[9] = (tran_low_t)fdct_round_shift(temp);
temp = output[10] * -cospi_24_64 + output[13] * -cospi_8_64;
step[10] = (tran_low_t)fdct_round_shift(temp);
step[11] = output[11];
step[12] = output[12];
temp = output[13] * cospi_24_64 + output[10] * -cospi_8_64;
step[13] = (tran_low_t)fdct_round_shift(temp);
temp = output[14] * cospi_8_64 + output[9] * cospi_24_64;
step[14] = (tran_low_t)fdct_round_shift(temp);
step[15] = output[15];
range_check(step, 16, 16);
// stage 5
output[0] = step[0];
output[1] = step[1];
output[2] = step[2];
output[3] = step[3];
temp = step[4] * cospi_28_64 + step[7] * cospi_4_64;
output[4] = (tran_low_t)fdct_round_shift(temp);
temp = step[5] * cospi_12_64 + step[6] * cospi_20_64;
output[5] = (tran_low_t)fdct_round_shift(temp);
temp = step[6] * cospi_12_64 + step[5] * -cospi_20_64;
output[6] = (tran_low_t)fdct_round_shift(temp);
temp = step[7] * cospi_28_64 + step[4] * -cospi_4_64;
output[7] = (tran_low_t)fdct_round_shift(temp);
output[8] = step[8] + step[9];
output[9] = step[8] - step[9];
output[10] = step[11] - step[10];
output[11] = step[11] + step[10];
output[12] = step[12] + step[13];
output[13] = step[12] - step[13];
output[14] = step[15] - step[14];
output[15] = step[15] + step[14];
range_check(output, 16, 16);
// stage 6
step[0] = output[0];
step[1] = output[1];
step[2] = output[2];
step[3] = output[3];
step[4] = output[4];
step[5] = output[5];
step[6] = output[6];
step[7] = output[7];
temp = output[8] * cospi_30_64 + output[15] * cospi_2_64;
step[8] = (tran_low_t)fdct_round_shift(temp);
temp = output[9] * cospi_14_64 + output[14] * cospi_18_64;
step[9] = (tran_low_t)fdct_round_shift(temp);
temp = output[10] * cospi_22_64 + output[13] * cospi_10_64;
step[10] = (tran_low_t)fdct_round_shift(temp);
temp = output[11] * cospi_6_64 + output[12] * cospi_26_64;
step[11] = (tran_low_t)fdct_round_shift(temp);
temp = output[12] * cospi_6_64 + output[11] * -cospi_26_64;
step[12] = (tran_low_t)fdct_round_shift(temp);
temp = output[13] * cospi_22_64 + output[10] * -cospi_10_64;
step[13] = (tran_low_t)fdct_round_shift(temp);
temp = output[14] * cospi_14_64 + output[9] * -cospi_18_64;
step[14] = (tran_low_t)fdct_round_shift(temp);
temp = output[15] * cospi_30_64 + output[8] * -cospi_2_64;
step[15] = (tran_low_t)fdct_round_shift(temp);
range_check(step, 16, 16);
// stage 7
output[0] = step[0];
output[1] = step[8];
output[2] = step[4];
output[3] = step[12];
output[4] = step[2];
output[5] = step[10];
output[6] = step[6];
output[7] = step[14];
output[8] = step[1];
output[9] = step[9];
output[10] = step[5];
output[11] = step[13];
output[12] = step[3];
output[13] = step[11];
output[14] = step[7];
output[15] = step[15];
static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
tran_high_t step1[8];
tran_high_t step2[8];
tran_high_t step3[8];
tran_high_t input[8];
tran_high_t temp1, temp2;
// step 1
input[0] = in[0] + in[15];
input[1] = in[1] + in[14];
input[2] = in[2] + in[13];
input[3] = in[3] + in[12];
input[4] = in[4] + in[11];
input[5] = in[5] + in[10];
input[6] = in[6] + in[ 9];
input[7] = in[7] + in[ 8];
step1[0] = in[7] - in[ 8];
step1[1] = in[6] - in[ 9];
step1[2] = in[5] - in[10];
step1[3] = in[4] - in[11];
step1[4] = in[3] - in[12];
step1[5] = in[2] - in[13];
step1[6] = in[1] - in[14];
step1[7] = in[0] - in[15];
// fdct8(step, step);
{
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
tran_high_t t0, t1, t2, t3;
tran_high_t x0, x1, x2, x3;
// stage 1
s0 = input[0] + input[7];
s1 = input[1] + input[6];
s2 = input[2] + input[5];
s3 = input[3] + input[4];
s4 = input[3] - input[4];
s5 = input[2] - input[5];
s6 = input[1] - input[6];
s7 = input[0] - input[7];
// fdct4(step, step);
x0 = s0 + s3;
x1 = s1 + s2;
x2 = s1 - s2;
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
out[0] = (tran_low_t)fdct_round_shift(t0);
out[4] = (tran_low_t)fdct_round_shift(t2);
out[8] = (tran_low_t)fdct_round_shift(t1);
out[12] = (tran_low_t)fdct_round_shift(t3);
// Stage 2
t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64;
t2 = fdct_round_shift(t0);
t3 = fdct_round_shift(t1);
// Stage 3
x0 = s4 + t2;
x1 = s4 - t2;
x2 = s7 - t3;
x3 = s7 + t3;
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
out[2] = (tran_low_t)fdct_round_shift(t0);
out[6] = (tran_low_t)fdct_round_shift(t2);
out[10] = (tran_low_t)fdct_round_shift(t1);
out[14] = (tran_low_t)fdct_round_shift(t3);
}
range_check(output, 16, 16);
// step 2
temp1 = (step1[5] - step1[2]) * cospi_16_64;
temp2 = (step1[4] - step1[3]) * cospi_16_64;
step2[2] = fdct_round_shift(temp1);
step2[3] = fdct_round_shift(temp2);
temp1 = (step1[4] + step1[3]) * cospi_16_64;
temp2 = (step1[5] + step1[2]) * cospi_16_64;
step2[4] = fdct_round_shift(temp1);
step2[5] = fdct_round_shift(temp2);
// step 3
step3[0] = step1[0] + step2[3];
step3[1] = step1[1] + step2[2];
step3[2] = step1[1] - step2[2];
step3[3] = step1[0] - step2[3];
step3[4] = step1[7] - step2[4];
step3[5] = step1[6] - step2[5];
step3[6] = step1[6] + step2[5];
step3[7] = step1[7] + step2[4];
// step 4
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
step2[1] = fdct_round_shift(temp1);
step2[2] = fdct_round_shift(temp2);
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
step2[5] = fdct_round_shift(temp1);
step2[6] = fdct_round_shift(temp2);
// step 5
step1[0] = step3[0] + step2[1];
step1[1] = step3[0] - step2[1];
step1[2] = step3[3] + step2[2];
step1[3] = step3[3] - step2[2];
step1[4] = step3[4] - step2[5];
step1[5] = step3[4] + step2[5];
step1[6] = step3[7] - step2[6];
step1[7] = step3[7] + step2[6];
// step 6
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[9] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
out[5] = (tran_low_t)fdct_round_shift(temp1);
out[13] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
out[3] = (tran_low_t)fdct_round_shift(temp1);
out[11] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
out[7] = (tran_low_t)fdct_round_shift(temp1);
out[15] = (tran_low_t)fdct_round_shift(temp2);
}
/* TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
static void fdct32(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment