Commit 72081457 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Adding 8x16/16x8/32x16/16x32 transforms

Adds forward, inverse transforms and scan orders.

Change-Id: Iab6994f4b0ef65e660b714d111b79b1c8172d6a8
parent 814986b8
......@@ -60,41 +60,41 @@ static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = {
1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)
};
static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = { 1, 4,
16, 64,
static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
1, 4, 16, 64,
#if CONFIG_EXT_TX
2, 2
2, 2, 8, 8, 32, 32
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = { 1, 2,
4, 8,
static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
1, 2, 4, 8,
#if CONFIG_EXT_TX
1, 2
1, 2, 2, 4, 4, 8
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = { 1, 2,
4, 8,
static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
1, 2, 4, 8,
#if CONFIG_EXT_TX
2, 1
2, 1, 4, 2, 8, 4
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = { 0, 2,
4, 6,
static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
0, 2, 4, 6,
#if CONFIG_EXT_TX
1, 1
1, 1, 3, 3, 5, 5
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = {
0, 1, 2, 3,
#if CONFIG_EXT_TX
0, 1
0, 1, 1, 2, 2, 3
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = {
0, 1, 2, 3,
#if CONFIG_EXT_TX
1, 0
1, 0, 2, 1, 3, 2
#endif // CONFIG_EXT_TX
};
......@@ -374,9 +374,13 @@ static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
BLOCK_16X16, // TX_16X16
BLOCK_32X32, // TX_32X32
#if CONFIG_EXT_TX
BLOCK_4X8, // TX_4X8
BLOCK_8X4, // TX_8X4
#endif // CONFIG_EXT_TX
BLOCK_4X8, // TX_4X8
BLOCK_8X4, // TX_8X4
BLOCK_8X16, // TX_8X16
BLOCK_16X8, // TX_16X8
BLOCK_16X32, // TX_16X32
BLOCK_32X16, // TX_32X16
#endif // CONFIG_EXT_TX
};
static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
......@@ -385,9 +389,13 @@ static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
#if CONFIG_EXT_TX
TX_4X4, // TX_4X8
TX_4X4, // TX_8X4
#endif // CONFIG_EXT_TX
TX_4X4, // TX_4X8
TX_4X4, // TX_8X4
TX_8X8, // TX_8X16
TX_8X8, // TX_16X8
TX_16X16, // TX_16X32
TX_16X16, // TX_32X16
#endif // CONFIG_EXT_TX
};
static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
......@@ -396,9 +404,13 @@ static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
#if CONFIG_EXT_TX
TX_8X8, // TX_4X8
TX_8X8, // TX_8X4
#endif // CONFIG_EXT_TX
TX_8X8, // TX_4X8
TX_8X8, // TX_8X4
TX_16X16, // TX_8X16
TX_16X16, // TX_16X8
TX_32X32, // TX_16X32
TX_32X32, // TX_32X16
#endif // CONFIG_EXT_TX
};
static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
......
......@@ -59,7 +59,12 @@ const uint16_t band_count_table[TX_SIZES_ALL][8] = {
{ 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
#if CONFIG_EXT_TX
{ 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 },
{ 1, 2, 3, 4, 8, 32 - 18, 0 },
{ 1, 2, 3, 4, 8, 32 - 18, 0 },
{ 1, 2, 3, 4, 11, 128 - 21, 0 },
{ 1, 2, 3, 4, 11, 128 - 21, 0 },
{ 1, 2, 3, 4, 11, 512 - 21, 0 },
{ 1, 2, 3, 4, 11, 512 - 21, 0 },
#endif // CONFIG_EXT_TX
};
......@@ -67,7 +72,12 @@ const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
{ 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
{ 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
#if CONFIG_EXT_TX
{ 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 },
{ 0, 1, 3, 6, 10, 18, 32, 0 },
{ 0, 1, 3, 6, 10, 18, 32, 0 },
{ 0, 1, 3, 6, 10, 21, 128, 0 },
{ 0, 1, 3, 6, 10, 21, 128, 0 },
{ 0, 1, 3, 6, 10, 21, 512, 0 },
{ 0, 1, 3, 6, 10, 21, 512, 0 },
#endif // CONFIG_EXT_TX
};
......@@ -116,7 +126,7 @@ const uint8_t vp10_coefband_trans_8x8plus[1024] = {
};
#if CONFIG_EXT_TX
const uint8_t vp10_coefband_trans_8x4_4x8[32] = {
const uint8_t vp10_coefband_trans_4x8_8x4[32] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
......
......@@ -156,7 +156,7 @@ void vp10_partial_adapt_probs(struct VP10Common *cm, int mi_row, int mi_col);
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]);
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x4_4x8[32]);
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x8_8x4[32]);
#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]);
......@@ -169,7 +169,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
case TX_4X4: return vp10_coefband_trans_4x4;
#if CONFIG_EXT_TX
case TX_4X8:
case TX_8X4: return vp10_coefband_trans_8x4_4x8;
return vp10_coefband_trans_4x8_8x4;
#endif // CONFIG_EXT_TX
default: return vp10_coefband_trans_8x8plus;
}
......@@ -228,6 +228,22 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = !!*(const uint16_t *)a;
left_ec = l[0] != 0;
break;
case TX_8X16:
above_ec = !!*(const uint16_t *)a;
left_ec = !!*(const uint32_t *)l;
break;
case TX_16X8:
above_ec = !!*(const uint32_t *)a;
left_ec = !!*(const uint16_t *)l;
break;
case TX_16X32:
above_ec = !!*(const uint32_t *)a;
left_ec = !!*(const uint64_t *)l;
break;
case TX_32X16:
above_ec = !!*(const uint64_t *)a;
left_ec = !!*(const uint32_t *)l;
break;
#endif // CONFIG_EXT_TX
case TX_8X8:
above_ec = !!*(const uint16_t *)a;
......
......@@ -138,9 +138,13 @@ typedef uint8_t TX_SIZE;
#define TX_SIZES ((TX_SIZE)4)
#if CONFIG_EXT_TX
#define TX_4X8 ((TX_SIZE)4) // 4x8 transform
#define TX_8X4 ((TX_SIZE)5) // 8x4 transform
#define TX_SIZES_ALL ((TX_SIZE)6) // Includes rectangular transforms
#define TX_4X8 ((TX_SIZE)4) // 4x8 transform
#define TX_8X4 ((TX_SIZE)5) // 8x4 transform
#define TX_8X16 ((TX_SIZE)6) // 8x16 transform
#define TX_16X8 ((TX_SIZE)7) // 16x8 transform
#define TX_16X32 ((TX_SIZE)8) // 16x32 transform
#define TX_32X16 ((TX_SIZE)9) // 32x16 transform
#define TX_SIZES_ALL ((TX_SIZE)10) // Includes rectangular transforms
#else
#define TX_SIZES_ALL ((TX_SIZE)4)
#endif // CONFIG_EXT_TX
......
......@@ -540,6 +540,7 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
#if CONFIG_EXT_TX
void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_4x8[] = {
......@@ -547,9 +548,8 @@ void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iadst8_c, idct4_c }, // ADST_DCT
{ idct8_c, iadst4_c }, // DCT_ADST
{ iadst8_c, iadst4_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ iadst8_c, idct4_c }, // FLIPADST_DCT
{ idct8_c, iadst4_c }, // DCT_FLIPADST
{ iadst8_c, idct4_c }, // FLIPADST_DCT
{ idct8_c, iadst4_c }, // DCT_FLIPADST
{ iadst8_c, iadst4_c }, // FLIPADST_FLIPADST
{ iadst8_c, iadst4_c }, // ADST_FLIPADST
{ iadst8_c, iadst4_c }, // FLIPADST_ADST
......@@ -560,34 +560,33 @@ void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx8_c, iadst4_c }, // H_ADST
{ iadst8_c, iidtx4_c }, // V_FLIPADST
{ iidtx8_c, iadst4_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
const int n = 4;
const int n2 = 8;
int i, j;
tran_low_t out[4][8], outtmp[4];
tran_low_t *outp = &out[0][0];
int outstride = 8;
int outstride = n2;
// inverse transform row vectors and transpose
for (i = 0; i < 8; ++i) {
for (i = 0; i < n2; ++i) {
IHT_4x8[tx_type].rows(input, outtmp);
for (j = 0; j < 4; ++j)
for (j = 0; j < n; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += 4;
input += n;
}
// inverse transform column vectors
for (i = 0; i < 4; ++i) {
for (i = 0; i < n; ++i) {
IHT_4x8[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 4);
#endif
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
// Sum with the destination
for (i = 0; i < 8; ++i) {
for (j = 0; j < 4; ++j) {
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
......@@ -602,9 +601,8 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iadst4_c, idct8_c }, // ADST_DCT
{ idct4_c, iadst8_c }, // DCT_ADST
{ iadst4_c, iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ iadst4_c, idct8_c }, // FLIPADST_DCT
{ idct4_c, iadst8_c }, // DCT_FLIPADST
{ iadst4_c, idct8_c }, // FLIPADST_DCT
{ idct4_c, iadst8_c }, // DCT_FLIPADST
{ iadst4_c, iadst8_c }, // FLIPADST_FLIPADST
{ iadst4_c, iadst8_c }, // ADST_FLIPADST
{ iadst4_c, iadst8_c }, // FLIPADST_ADST
......@@ -615,34 +613,33 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx4_c, iadst8_c }, // H_ADST
{ iadst4_c, iidtx8_c }, // V_FLIPADST
{ iidtx4_c, iadst8_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
const int n = 4;
const int n2 = 8;
int i, j;
tran_low_t out[8][4], outtmp[8];
tran_low_t *outp = &out[0][0];
int outstride = 4;
int outstride = n;
// inverse transform row vectors and transpose
for (i = 0; i < 4; ++i) {
for (i = 0; i < n; ++i) {
IHT_8x4[tx_type].rows(input, outtmp);
for (j = 0; j < 8; ++j)
for (j = 0; j < n2; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += 8;
input += n2;
}
// inverse transform column vectors
for (i = 0; i < 8; ++i) {
for (i = 0; i < n2; ++i) {
IHT_8x4[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 8);
#endif
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
// Sum with the destination
for (i = 0; i < 4; ++i) {
for (j = 0; j < 8; ++j) {
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
......@@ -650,6 +647,219 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
void vp10_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
static const transform_2d IHT_8x16[] = {
{ idct16_c, idct8_c }, // DCT_DCT
{ iadst16_c, idct8_c }, // ADST_DCT
{ idct16_c, iadst8_c }, // DCT_ADST
{ iadst16_c, iadst8_c }, // ADST_ADST
{ iadst16_c, idct8_c }, // FLIPADST_DCT
{ idct16_c, iadst8_c }, // DCT_FLIPADST
{ iadst16_c, iadst8_c }, // FLIPADST_FLIPADST
{ iadst16_c, iadst8_c }, // ADST_FLIPADST
{ iadst16_c, iadst8_c }, // FLIPADST_ADST
{ iidtx16_c, iidtx8_c }, // IDTX
{ idct16_c, iidtx8_c }, // V_DCT
{ iidtx16_c, idct8_c }, // H_DCT
{ iadst16_c, iidtx8_c }, // V_ADST
{ iidtx16_c, iadst8_c }, // H_ADST
{ iadst16_c, iidtx8_c }, // V_FLIPADST
{ iidtx16_c, iadst8_c }, // H_FLIPADST
};
const int n = 8;
const int n2 = 16;
int i, j;
tran_low_t out[8][16], outtmp[8];
tran_low_t *outp = &out[0][0];
int outstride = n2;
// inverse transform row vectors and transpose
for (i = 0; i < n2; ++i) {
IHT_8x16[tx_type].rows(input, outtmp);
for (j = 0; j < n; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += n;
}
// inverse transform column vectors
for (i = 0; i < n; ++i) {
IHT_8x16[tx_type].cols(out[i], out[i]);
}
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
// Sum with the destination
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
}
}
}
void vp10_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
static const transform_2d IHT_16x8[] = {
{ idct8_c, idct16_c }, // DCT_DCT
{ iadst8_c, idct16_c }, // ADST_DCT
{ idct8_c, iadst16_c }, // DCT_ADST
{ iadst8_c, iadst16_c }, // ADST_ADST
{ iadst8_c, idct16_c }, // FLIPADST_DCT
{ idct8_c, iadst16_c }, // DCT_FLIPADST
{ iadst8_c, iadst16_c }, // FLIPADST_FLIPADST
{ iadst8_c, iadst16_c }, // ADST_FLIPADST
{ iadst8_c, iadst16_c }, // FLIPADST_ADST
{ iidtx8_c, iidtx16_c }, // IDTX
{ idct8_c, iidtx16_c }, // V_DCT
{ iidtx8_c, idct16_c }, // H_DCT
{ iadst8_c, iidtx16_c }, // V_ADST
{ iidtx8_c, iadst16_c }, // H_ADST
{ iadst8_c, iidtx16_c }, // V_FLIPADST
{ iidtx8_c, iadst16_c }, // H_FLIPADST
};
const int n = 8;
const int n2 = 16;
int i, j;
tran_low_t out[16][8], outtmp[16];
tran_low_t *outp = &out[0][0];
int outstride = n;
// inverse transform row vectors and transpose
for (i = 0; i < n; ++i) {
IHT_16x8[tx_type].rows(input, outtmp);
for (j = 0; j < n2; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += n2;
}
// inverse transform column vectors
for (i = 0; i < n2; ++i) {
IHT_16x8[tx_type].cols(out[i], out[i]);
}
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
// Sum with the destination
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
}
}
}
void vp10_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
static const transform_2d IHT_16x32[] = {
{ idct32_c, idct16_c }, // DCT_DCT
{ ihalfright32_c, idct16_c }, // ADST_DCT
{ idct32_c, iadst16_c }, // DCT_ADST
{ ihalfright32_c, iadst16_c }, // ADST_ADST
{ ihalfright32_c, idct16_c }, // FLIPADST_DCT
{ idct32_c, iadst16_c }, // DCT_FLIPADST
{ ihalfright32_c, iadst16_c }, // FLIPADST_FLIPADST
{ ihalfright32_c, iadst16_c }, // ADST_FLIPADST
{ ihalfright32_c, iadst16_c }, // FLIPADST_ADST
{ iidtx32_c, iidtx16_c }, // IDTX
{ idct32_c, iidtx16_c }, // V_DCT
{ iidtx32_c, idct16_c }, // H_DCT
{ ihalfright32_c, iidtx16_c }, // V_ADST
{ iidtx32_c, iadst16_c }, // H_ADST
{ ihalfright32_c, iidtx16_c }, // V_FLIPADST
{ iidtx32_c, iadst16_c }, // H_FLIPADST
};
const int n = 16;
const int n2 = 32;
int i, j;
tran_low_t out[16][32], outtmp[16];
tran_low_t *outp = &out[0][0];
int outstride = n2;
// inverse transform row vectors and transpose
for (i = 0; i < n2; ++i) {
IHT_16x32[tx_type].rows(input, outtmp);
for (j = 0; j < n; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += n;
}
// inverse transform column vectors
for (i = 0; i < n; ++i) {
IHT_16x32[tx_type].cols(out[i], out[i]);
}
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
// Sum with the destination
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
}
}
}
void vp10_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
static const transform_2d IHT_32x16[] = {
{ idct16_c, idct32_c }, // DCT_DCT
{ iadst16_c, idct32_c }, // ADST_DCT
{ idct16_c, ihalfright32_c }, // DCT_ADST
{ iadst16_c, ihalfright32_c }, // ADST_ADST
{ iadst16_c, idct32_c }, // FLIPADST_DCT
{ idct16_c, ihalfright32_c }, // DCT_FLIPADST
{ iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
{ iadst16_c, ihalfright32_c }, // ADST_FLIPADST
{ iadst16_c, ihalfright32_c }, // FLIPADST_ADST
{ iidtx16_c, iidtx32_c }, // IDTX
{ idct16_c, iidtx32_c }, // V_DCT
{ iidtx16_c, idct32_c }, // H_DCT
{ iadst16_c, iidtx32_c }, // V_ADST
{ iidtx16_c, ihalfright32_c }, // H_ADST
{ iadst16_c, iidtx32_c }, // V_FLIPADST
{ iidtx16_c, ihalfright32_c }, // H_FLIPADST
};
const int n = 16;
const int n2 = 32;
int i, j;
tran_low_t out[32][16], outtmp[32];
tran_low_t *outp = &out[0][0];
int outstride = n;
// inverse transform row vectors and transpose
for (i = 0; i < n; ++i) {
IHT_32x16[tx_type].rows(input, outtmp);
for (j = 0; j < n2; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += n2;
}
// inverse transform column vectors
for (i = 0; i < n2; ++i) {
IHT_32x16[tx_type].cols(out[i], out[i]);
}
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
// Sum with the destination
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
}
}
}
#endif // CONFIG_EXT_TX
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_8[] = {
......@@ -930,16 +1140,40 @@ void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
}
#if CONFIG_EXT_TX
void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type) {
(void)eob;
void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type) {
(void) eob;
vp10_iht4x8_32_add(input, dest, stride, tx_type);
}
void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type) {
(void) eob;
vp10_iht8x4_32_add(input, dest, stride, tx_type);
}
void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type) {
(void)eob;
vp10_iht4x8_32_add(input, dest, stride, tx_type);
void vp10_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type) {
(void) eob;
vp10_iht8x16_128_add(input, dest, stride, tx_type);
}
void vp10_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type) {
(void) eob;
vp10_iht16x8_128_add(input, dest, stride, tx_type);
}
void vp10_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type) {
(void) eob;
vp10_iht16x32_512_add(input, dest, stride, tx_type);
}
void vp10_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
int stride, int eob, TX_TYPE tx_type) {
(void) eob;
vp10_iht32x16_512_add(input, dest, stride, tx_type);
}
#endif // CONFIG_EXT_TX
......@@ -1116,34 +1350,36 @@ void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
{ vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
{ highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_FLIPADST
};
const int n = 4;
const int n2 = 8;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
int i, j;
tran_low_t out[4][8], outtmp[4];
tran_low_t *outp = &out[0][0];
int outstride = 8;
int outstride = n2;
// inverse transform row vectors, and transpose
for (i = 0; i < 8; ++i) {
for (i = 0; i < n2; ++i) {
HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
for (j = 0; j < 4; ++j) {
out[j][i] =
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
for (j = 0; j < n; ++j) {
out[j][i] = HIGHBD_WRAPLOW(
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
}
input += 4;
input += n;
}
// inverse transform column vectors
for (i = 0; i < 4; ++i) {
for (i = 0; i < n; ++i) {
HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
}
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 4);
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
// Sum with the destination
for (i = 0; i < 8; ++i) {
for (j = 0; j < 4; ++j) {
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] =
......@@ -1172,34 +1408,36 @@ void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
{ vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
{ highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_FLIPADST
};
const int n = 4;
const int n2 = 8;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
int i, j;
tran_low_t out[8][4], outtmp[8];
tran_low_t *outp = &out[0][0];
int outstride = 4;
int outstride = n;
// inverse transform row vectors, and transpose
for (i = 0; i < 4; ++i) {
for (i = 0; i < n; ++i) {
HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
for (j = 0; j < 8; ++j) {
out[j][i] =
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
for (j = 0; j < n2; ++j) {
out[j][i] = HIGHBD_WRAPLOW(
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
}
input += 8;