Commit ff417649 authored by Dmitry Kovalev's avatar Dmitry Kovalev
Browse files

Removing _1d suffix from transform names.

It is enough to specify (e.g.) idct16, it is obviously different from
idct16x16.

Change-Id: I6b408a37a945de3162429380b59a775b03b95db0
parent 00bfacb7
......@@ -96,7 +96,7 @@ void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) {
}
}
static void idct4_1d(const int16_t *input, int16_t *output) {
static void idct4(const int16_t *input, int16_t *output) {
int16_t step[4];
int temp1, temp2;
// stage 1
......@@ -124,7 +124,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
// Rows
for (i = 0; i < 4; ++i) {
idct4_1d(input, outptr);
idct4(input, outptr);
input += 4;
outptr += 4;
}
......@@ -133,7 +133,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
idct4_1d(temp_in, temp_out);
idct4(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ dest[j * stride + i]);
......@@ -156,7 +156,7 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) {
}
}
static void idct8_1d(const int16_t *input, int16_t *output) {
static void idct8(const int16_t *input, int16_t *output) {
int16_t step1[8], step2[8];
int temp1, temp2;
// stage 1
......@@ -174,7 +174,7 @@ static void idct8_1d(const int16_t *input, int16_t *output) {
step1[6] = dct_const_round_shift(temp2);
// stage 2 & stage 3 - even half
idct4_1d(step1, step1);
idct4(step1, step1);
// stage 2 - odd half
step2[4] = step1[4] + step1[5];
......@@ -209,7 +209,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
for (i = 0; i < 8; ++i) {
idct8_1d(input, outptr);
idct8(input, outptr);
input += 8;
outptr += 8;
}
......@@ -218,7 +218,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
idct8_1d(temp_in, temp_out);
idct8(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
......@@ -238,7 +238,7 @@ void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
}
}
static void iadst4_1d(const int16_t *input, int16_t *output) {
static void iadst4(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[0];
......@@ -283,10 +283,10 @@ static void iadst4_1d(const int16_t *input, int16_t *output) {
void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
const transform_2d IHT_4[] = {
{ idct4_1d, idct4_1d }, // DCT_DCT = 0
{ iadst4_1d, idct4_1d }, // ADST_DCT = 1
{ idct4_1d, iadst4_1d }, // DCT_ADST = 2
{ iadst4_1d, iadst4_1d } // ADST_ADST = 3
{ idct4, idct4 }, // DCT_DCT = 0
{ iadst4, idct4 }, // ADST_DCT = 1
{ idct4, iadst4 }, // DCT_ADST = 2
{ iadst4, iadst4 } // ADST_ADST = 3
};
int i, j;
......@@ -311,7 +311,7 @@ void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
+ dest[j * stride + i]);
}
}
static void iadst8_1d(const int16_t *input, int16_t *output) {
static void iadst8(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[7];
......@@ -389,10 +389,10 @@ static void iadst8_1d(const int16_t *input, int16_t *output) {
}
static const transform_2d IHT_8[] = {
{ idct8_1d, idct8_1d }, // DCT_DCT = 0
{ iadst8_1d, idct8_1d }, // ADST_DCT = 1
{ idct8_1d, iadst8_1d }, // DCT_ADST = 2
{ iadst8_1d, iadst8_1d } // ADST_ADST = 3
{ idct8, idct8 }, // DCT_DCT = 0
{ iadst8, idct8 }, // ADST_DCT = 1
{ idct8, iadst8 }, // DCT_ADST = 2
{ iadst8, iadst8 } // ADST_ADST = 3
};
void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
......@@ -430,7 +430,7 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
// only first 4 row has non-zero coefs
for (i = 0; i < 4; ++i) {
idct8_1d(input, outptr);
idct8(input, outptr);
input += 8;
outptr += 8;
}
......@@ -439,14 +439,14 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
idct8_1d(temp_in, temp_out);
idct8(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
}
}
static void idct16_1d(const int16_t *input, int16_t *output) {
static void idct16(const int16_t *input, int16_t *output) {
int16_t step1[16], step2[16];
int temp1, temp2;
......@@ -619,7 +619,7 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
for (i = 0; i < 16; ++i) {
idct16_1d(input, outptr);
idct16(input, outptr);
input += 16;
outptr += 16;
}
......@@ -628,14 +628,14 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
idct16_1d(temp_in, temp_out);
idct16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
}
}
static void iadst16_1d(const int16_t *input, int16_t *output) {
static void iadst16(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
......@@ -807,10 +807,10 @@ static void iadst16_1d(const int16_t *input, int16_t *output) {
}
static const transform_2d IHT_16[] = {
{ idct16_1d, idct16_1d }, // DCT_DCT = 0
{ iadst16_1d, idct16_1d }, // ADST_DCT = 1
{ idct16_1d, iadst16_1d }, // DCT_ADST = 2
{ iadst16_1d, iadst16_1d } // ADST_ADST = 3
{ idct16, idct16 }, // DCT_DCT = 0
{ iadst16, idct16 }, // ADST_DCT = 1
{ idct16, iadst16 }, // DCT_ADST = 2
{ iadst16, iadst16 } // ADST_ADST = 3
};
void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride,
......@@ -848,7 +848,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
for (i = 0; i < 4; ++i) {
idct16_1d(input, outptr);
idct16(input, outptr);
input += 16;
outptr += 16;
}
......@@ -857,7 +857,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j*16 + i];
idct16_1d(temp_in, temp_out);
idct16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
......@@ -877,7 +877,7 @@ void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
}
}
static void idct32_1d(const int16_t *input, int16_t *output) {
static void idct32(const int16_t *input, int16_t *output) {
int16_t step1[32], step2[32];
int temp1, temp2;
......@@ -1263,7 +1263,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
if (zero_coeff[0] | zero_coeff[1])
idct32_1d(input, outptr);
idct32(input, outptr);
else
vpx_memset(outptr, 0, sizeof(int16_t) * 32);
input += 32;
......@@ -1274,7 +1274,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
idct32(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
......@@ -1290,7 +1290,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
// Rows
// only upper-left 8x8 has non-zero coeff
for (i = 0; i < 8; ++i) {
idct32_1d(input, outptr);
idct32(input, outptr);
input += 32;
outptr += 32;
}
......@@ -1299,7 +1299,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
idct32(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
......
......@@ -180,7 +180,7 @@ static INLINE void transpose_4x4(__m128i *res) {
res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
}
static void idct4_1d_sse2(__m128i *in) {
static void idct4_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
......@@ -216,7 +216,7 @@ static void idct4_1d_sse2(__m128i *in) {
in[1] = _mm_shuffle_epi32(in[1], 0x4E);
}
static void iadst4_1d_sse2(__m128i *in) {
static void iadst4_sse2(__m128i *in) {
const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9);
const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9);
const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9);
......@@ -276,20 +276,20 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
idct4_1d_sse2(in);
idct4_1d_sse2(in);
idct4_sse2(in);
idct4_sse2(in);
break;
case 1: // ADST_DCT
idct4_1d_sse2(in);
iadst4_1d_sse2(in);
idct4_sse2(in);
iadst4_sse2(in);
break;
case 2: // DCT_ADST
iadst4_1d_sse2(in);
idct4_1d_sse2(in);
iadst4_sse2(in);
idct4_sse2(in);
break;
case 3: // ADST_ADST
iadst4_1d_sse2(in);
iadst4_1d_sse2(in);
iadst4_sse2(in);
iadst4_sse2(in);
break;
default:
assert(0);
......@@ -455,7 +455,7 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
res1 = _mm_packs_epi32(tmp2, tmp3); \
}
#define IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) \
{ \
/* Stage1 */ \
......@@ -573,7 +573,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
}
......@@ -674,7 +674,7 @@ static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
}
static void idct8_1d_sse2(__m128i *in) {
static void idct8_sse2(__m128i *in) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
......@@ -695,11 +695,11 @@ static void idct8_1d_sse2(__m128i *in) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]);
}
static void iadst8_1d_sse2(__m128i *in) {
static void iadst8_sse2(__m128i *in) {
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
......@@ -946,20 +946,20 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
idct8_1d_sse2(in);
idct8_1d_sse2(in);
idct8_sse2(in);
idct8_sse2(in);
break;
case 1: // ADST_DCT
idct8_1d_sse2(in);
iadst8_1d_sse2(in);
idct8_sse2(in);
iadst8_sse2(in);
break;
case 2: // DCT_ADST
iadst8_1d_sse2(in);
idct8_1d_sse2(in);
iadst8_sse2(in);
idct8_sse2(in);
break;
case 3: // ADST_ADST
iadst8_1d_sse2(in);
iadst8_1d_sse2(in);
iadst8_sse2(in);
iadst8_sse2(in);
break;
default:
assert(0);
......@@ -1104,7 +1104,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
IDCT8_1D(in0, in1, in2, in3, zero, zero, zero, zero,
IDCT8(in0, in1, in2, in3, zero, zero, zero, zero,
in0, in1, in2, in3, in4, in5, in6, in7);
// Final rounding and shift
in0 = _mm_adds_epi16(in0, final_rounding);
......@@ -1135,7 +1135,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE(dest, in7);
}
#define IDCT16_1D \
#define IDCT16 \
/* Stage2 */ \
{ \
const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \
......@@ -1264,7 +1264,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
stp2_10, stp2_13, stp2_11, stp2_12) \
}
#define IDCT16_10_1D \
#define IDCT16_10 \
/* Stage2 */ \
{ \
const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \
......@@ -1437,7 +1437,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in, in);
array_transpose_8x8(in+8, in+8);
IDCT16_1D
IDCT16
// Stage7
curr1[0] = _mm_add_epi16(stp2_0, stp1_15);
......@@ -1465,7 +1465,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(l+i*8, in);
array_transpose_8x8(r+i*8, in+8);
IDCT16_1D
IDCT16
// 2-D
in[0] = _mm_add_epi16(stp2_0, stp1_15);
......@@ -1590,7 +1590,7 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
res0[15] = tbuf[7];
}
static void iadst16_1d_8col(__m128i *in) {
static void iadst16_8col(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
......@@ -2060,7 +2060,7 @@ static void iadst16_1d_8col(__m128i *in) {
in[15] = _mm_sub_epi16(kZero, s[1]);
}
static void idct16_1d_8col(__m128i *in) {
static void idct16_8col(__m128i *in) {
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
......@@ -2404,16 +2404,16 @@ static void idct16_1d_8col(__m128i *in) {
in[15] = _mm_sub_epi16(s[0], s[15]);
}
static void idct16_1d_sse2(__m128i *in0, __m128i *in1) {
static void idct16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
idct16_1d_8col(in0);
idct16_1d_8col(in1);
idct16_8col(in0);
idct16_8col(in1);
}
static void iadst16_1d_sse2(__m128i *in0, __m128i *in1) {
static void iadst16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
iadst16_1d_8col(in0);
iadst16_1d_8col(in1);
iadst16_8col(in0);
iadst16_8col(in1);
}
static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
......@@ -2502,20 +2502,20 @@ void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
idct16_1d_sse2(in0, in1);
idct16_1d_sse2(in0, in1);
idct16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
case 1: // ADST_DCT
idct16_1d_sse2(in0, in1);
iadst16_1d_sse2(in0, in1);
idct16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
case 2: // DCT_ADST
iadst16_1d_sse2(in0, in1);
idct16_1d_sse2(in0, in1);
iadst16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
case 3: // ADST_ADST
iadst16_1d_sse2(in0, in1);
iadst16_1d_sse2(in0, in1);
iadst16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
default:
assert(0);
......@@ -2732,7 +2732,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 2; i++) {
array_transpose_4X8(l + 8*i, in);
IDCT16_10_1D
IDCT16_10
// Stage7
in[0] = _mm_add_epi16(stp2_0, stp1_15);
......@@ -2814,7 +2814,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
input += 8; \
} \
#define IDCT32_1D_34 \
#define IDCT32_34 \
/* Stage1 */ \
{ \
const __m128i zero = _mm_setzero_si128();\
......@@ -3115,7 +3115,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
}
#define IDCT32_1D \
#define IDCT32 \
/* Stage1 */ \
{ \
const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \
......@@ -3554,7 +3554,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in+16, in+16);
array_transpose_8x8(in+24, in+24);
IDCT32_1D
IDCT32
// 1_D: Store 32 intermediate results for each 8x32 block.
col[0] = _mm_add_epi16(stp1_0, stp1_31);
......@@ -3593,7 +3593,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
const __m128i zero = _mm_setzero_si128();
// Transpose 32x8 block to 8x32 block
array_transpose_8x8(col+i*8, in);
IDCT32_1D_34
IDCT32_34
// 2_D: Calculate the results and store them to destination.
in[0] = _mm_add_epi16(stp1_0, stp1_31);
......@@ -3922,7 +3922,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in+16, in+16);
array_transpose_8x8(in+24, in+24);
IDCT32_1D
IDCT32
// 1_D: Store 32 intermediate results for each 8x32 block.
col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
......@@ -3969,7 +3969,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(col+j+64, in+16);
array_transpose_8x8(col+j+96, in+24);
IDCT32_1D
IDCT32
// 2_D: Calculate the results and store them to destination.
in[0] = _mm_add_epi16(stp1_0, stp1_31);
......
......@@ -997,7 +997,7 @@ static INLINE int half_round_shift(int input) {
return rv;
}
static void dct32_1d(const int *input, int *output, int round) {
static void fdct32(const int *input, int *output, int round) {
int step[32];
// Stage 1
step[0] = input[0] + input[(32 - 1)];
......@@ -1329,7 +1329,7 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
dct32_1d(temp_in, temp_out, 0);
fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
......@@ -1339,13 +1339,13 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
dct32_1d(temp_in, temp_out, 0);
fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
}
// Note that although we use dct_32_round in dct32_1d computation flow,
// Note that although we use dct_32_round in dct32 computation flow,
// this 2d fdct32x32 for rate-distortion optimization loop is operating
// within 16 bits precision.
void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
......@@ -1357,7 +1357,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
dct32_1d(temp_in, temp_out, 0);
fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing
// output[j * 32 + i] = (temp_out[j] + 1) >> 2;
......@@ -1370,7 +1370,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
dct32_1d(temp_in, temp_out, 1);
fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
out[j + i * 32] = temp_out[j];
}
......
......@@ -163,7 +163,7 @@ static INLINE void transpose_4x4_avx2(__m128i *res) {
res[3] = _mm_unpackhi_epi64(res[2], res[2]);
}
void fdct4_1d_avx2(__m128i *in) {
void fdct4_avx2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
......@@ -196,7 +196,7 @@ void fdct4_1d_avx2(__m128i *in) {
transpose_4x4_avx2(in);
}
void fadst4_1d_avx2(__m128i *in) {
void fadst4_avx2(__m128i *in) {
const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
......@@ -250,20 +250,20 @@ void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
load_buffer_4x4_avx2(input, in, stride);
switch (tx_type) {
case 0: // DCT_DCT
fdct4_1d_avx2(in);
fdct4_1d_avx2(in);
fdct4_avx2(in);
fdct4_avx2(in);
break;
case 1: // ADST_DCT
fadst4_1d_avx2(in);
fdct4_1d_avx2(in);
fadst4_avx2(in);
fdct4_avx2(in);
break;
case 2: // DCT_ADST
fdct4_1d_avx2(in);
fadst4_1d_avx2(in);
fdct4_avx2(in);
fadst4_avx2(in);
break;
case 3: // ADST_ADST
fadst4_1d_avx2(in);
fadst4_1d_avx2(in);
fadst4_avx2(in);
fadst4_avx2(in);
break;
default:
assert(0);
......@@ -658,7 +658,7 @@ static INLINE void array_transpose_8x8_avx2(__m128i *in, __m128i *res) {