Commit e5848dea authored by Debargha Mukherjee's avatar Debargha Mukherjee

Rectangular transforms 4x8 & 8x4

Added a new expt rect-tx to be used in conjunction with ext-tx.
[rect-tx is a temporary config flag and will eventually be
merged into ext-tx once it works correctly with all other
experiments].

Added 4x8 and 8x4 tranforms for use initially with rectangular
sub8x8 y blocks as part of this experiment.

There is about a -0.2% BDRATE improvement on lowres, others pending.

When var-tx is on rectangular transforms are currently not used.
That will be enabled in a subsequent patch.

Change-Id: Iaf3f88ede2740ffe6a0ffb1ef5fc01a16cd0283a
parent 3c13124e
......@@ -267,6 +267,7 @@ EXPERIMENT_LIST="
fp_mb_stats
emulate_hardware
var_tx
rect_tx
ref_mv
dual_filter
ext_tx
......
......@@ -53,7 +53,9 @@ void vp10_foreach_transformed_block_in_plane(
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
const int step = 1 << (tx_size << 1);
const uint8_t num_4x4_tw = num_4x4_blocks_wide_txsize_lookup[tx_size];
const uint8_t num_4x4_th = num_4x4_blocks_high_txsize_lookup[tx_size];
const int step = num_4x4_tw * num_4x4_th;
int i = 0, r, c;
// If mb_to_right_edge is < 0 we are in a situation in which
......@@ -63,13 +65,15 @@ void vp10_foreach_transformed_block_in_plane(
xd->mb_to_right_edge >> (5 + pd->subsampling_x));
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step;
const int extra_step =
((num_4x4_w - max_blocks_wide) >>
num_4x4_blocks_wide_txsize_log2_lookup[tx_size]) * step;
// Keep track of the row and column of the blocks we use so that we know
// if we are in the unrestricted motion border.
for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
for (r = 0; r < max_blocks_high; r += num_4x4_th) {
// Skip visiting the sub blocks that are wholly within the UMV.
for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) {
for (c = 0; c < max_blocks_wide; c += num_4x4_tw) {
visit(plane, i, r, c, plane_bsize, tx_size, arg);
i += step;
}
......@@ -82,33 +86,33 @@ void vp10_foreach_transformed_block(const MACROBLOCKD* const xd,
foreach_transformed_block_visitor visit,
void *arg) {
int plane;
for (plane = 0; plane < MAX_MB_PLANE; ++plane)
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
}
void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
int aoff, int loff) {
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
int aoff, int loff) {
ENTROPY_CONTEXT *const a = pd->above_context + aoff;
ENTROPY_CONTEXT *const l = pd->left_context + loff;
const int tx_size_in_blocks = 1 << tx_size;
const int tx_w_in_blocks = num_4x4_blocks_wide_txsize_lookup[tx_size];
const int tx_h_in_blocks = num_4x4_blocks_high_txsize_lookup[tx_size];
// above
if (has_eob && xd->mb_to_right_edge < 0) {
int i;
const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] +
(xd->mb_to_right_edge >> (5 + pd->subsampling_x));
int above_contexts = tx_size_in_blocks;
int above_contexts = tx_w_in_blocks;
if (above_contexts + aoff > blocks_wide)
above_contexts = blocks_wide - aoff;
for (i = 0; i < above_contexts; ++i)
a[i] = has_eob;
for (i = above_contexts; i < tx_size_in_blocks; ++i)
for (i = above_contexts; i < tx_w_in_blocks; ++i)
a[i] = 0;
} else {
memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_w_in_blocks);
}
// left
......@@ -116,16 +120,16 @@ void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
int i;
const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] +
(xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
int left_contexts = tx_size_in_blocks;
int left_contexts = tx_h_in_blocks;
if (left_contexts + loff > blocks_high)
left_contexts = blocks_high - loff;
for (i = 0; i < left_contexts; ++i)
l[i] = has_eob;
for (i = left_contexts; i < tx_size_in_blocks; ++i)
for (i = left_contexts; i < tx_h_in_blocks; ++i)
l[i] = 0;
} else {
memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_h_in_blocks);
}
}
......
......@@ -422,6 +422,18 @@ static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) {
}
#endif // CONFIG_SUPERTX
static INLINE int get_tx1d_width(TX_SIZE tx_size) {
return num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
}
static INLINE int get_tx1d_height(TX_SIZE tx_size) {
return num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
}
static INLINE int get_tx2d_size(TX_SIZE tx_size) {
return num_4x4_blocks_txsize_lookup[tx_size] << 4;
}
#if CONFIG_EXT_TX
#define ALLOW_INTRA_EXT_TX 1
// whether masked transforms are used for 32X32
......@@ -438,6 +450,7 @@ static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
#if EXT_TX_SIZES == 4
static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs,
int is_inter) {
tx_size = txsize_sqr_map[tx_size];
if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
#if USE_REDUCED_TXSET_FOR_16X16
if (tx_size == TX_32X32)
......@@ -468,6 +481,7 @@ static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = {
static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs,
int is_inter) {
(void) is_inter;
tx_size = txsize_sqr_map[tx_size];
if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
if (tx_size == TX_32X32) return 0;
#if USE_REDUCED_TXSET_FOR_16X16
......@@ -622,10 +636,11 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
#if CONFIG_EXT_TX
#if EXT_TX_SIZES == 4
if (xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 ||
(tx_size >= TX_32X32 && !is_inter_block(mbmi)))
if (xd->lossless[mbmi->segment_id] ||
txsize_sqr_map[tx_size] > TX_32X32 ||
(txsize_sqr_map[tx_size] >= TX_32X32 && !is_inter_block(mbmi)))
#else
if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] >= TX_32X32)
#endif
return DCT_DCT;
if (mbmi->sb_type >= BLOCK_8X8) {
......@@ -637,8 +652,8 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
}
if (is_inter_block(mbmi))
// UV Inter only
return (mbmi->tx_type == IDTX && tx_size == TX_32X32 ?
DCT_DCT : mbmi->tx_type);
return (mbmi->tx_type == IDTX && txsize_sqr_map[tx_size] == TX_32X32) ?
DCT_DCT : mbmi->tx_type;
}
// Sub8x8-Inter/Intra OR UV-Intra
......@@ -647,10 +662,10 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
else // Sub8x8 Intra OR UV-Intra
return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ?
get_y_mode(mi, block_idx) : mbmi->uv_mode];
#else
#else // CONFIG_EXT_TX
(void) block_idx;
if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
tx_size >= TX_32X32)
txsize_sqr_map[tx_size] >= TX_32X32)
return DCT_DCT;
return mbmi->tx_type;
#endif // CONFIG_EXT_TX
......
......@@ -50,6 +50,46 @@ static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] =
static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)};
static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
1, 4, 16, 64,
#if CONFIG_EXT_TX
2, 2
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
1, 2, 4, 8,
#if CONFIG_EXT_TX
1, 2
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
1, 2, 4, 8,
#if CONFIG_EXT_TX
2, 1
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
0, 2, 4, 6,
#if CONFIG_EXT_TX
1, 1
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup
[TX_SIZES_ALL] = {
0, 1, 2, 3,
#if CONFIG_EXT_TX
0, 1
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_high_txsize_log2_lookup
[TX_SIZES_ALL] = {
0, 1, 2, 3,
#if CONFIG_EXT_TX
1, 0
#endif // CONFIG_EXT_TX
};
// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
static const uint8_t size_group_lookup[BLOCK_SIZES] =
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)};
......@@ -297,13 +337,59 @@ static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
#endif // CONFIG_EXT_PARTITION
};
static const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = {
BLOCK_4X4, // TX_4X4
BLOCK_8X8, // TX_8X8
BLOCK_16X16, // TX_16X16
BLOCK_32X32, // TX_32X32
#if CONFIG_EXT_TX
static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = {
// 4X4
TX_4X4,
// 4X8, 8X4, 8X8
TX_4X8, TX_8X4, TX_8X8,
// 8X16, 16X8, 16X16
TX_8X8, TX_8X8, TX_16X16,
// 16X32, 32X16, 32X32
TX_16X16, TX_16X16, TX_32X32,
// 32X64, 64X32, 64X64
TX_32X32, TX_32X32, TX_32X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
TX_32X32, TX_32X32, TX_32X32,
#endif // CONFIG_EXT_PARTITION
};
#endif // CONFIG_EXT_TX
static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
BLOCK_4X4, // TX_4X4
BLOCK_8X8, // TX_8X8
BLOCK_16X16, // TX_16X16
BLOCK_32X32, // TX_32X32
#if CONFIG_EXT_TX
BLOCK_4X8, // TX_4X8
BLOCK_8X4, // TX_8X4
#endif // CONFIG_EXT_TX
};
static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
#if CONFIG_EXT_TX
TX_4X4, // TX_4X8
TX_4X4, // TX_8X4
#endif // CONFIG_EXT_TX
};
static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
#if CONFIG_EXT_TX
TX_8X8, // TX_4X8
TX_8X8, // TX_8X4
#endif // CONFIG_EXT_TX
};
static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
TX_4X4, // ONLY_4X4
TX_8X8, // ALLOW_8X8
......
......@@ -56,11 +56,33 @@ const vpx_prob vp10_cat6_prob_high12[] = {
};
#endif
const uint16_t band_count_table[TX_SIZES_ALL][8] = {
{ 1, 2, 3, 4, 3, 16 - 13, 0 },
{ 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 },
{ 1, 2, 3, 4, 11, 1024 - 21, 0 },
#if CONFIG_EXT_TX
{ 1, 2, 3, 4, 8, 32 - 18, 0 },
{ 1, 2, 3, 4, 8, 32 - 18, 0 },
#endif // CONFIG_EXT_TX
};
const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
{ 0, 1, 3, 6, 10, 13, 16, 0 },
{ 0, 1, 3, 6, 10, 21, 64, 0 },
{ 0, 1, 3, 6, 10, 21, 256, 0 },
{ 0, 1, 3, 6, 10, 21, 1024, 0 },
#if CONFIG_EXT_TX
{ 0, 1, 3, 6, 10, 18, 32, 0 },
{ 0, 1, 3, 6, 10, 18, 32, 0 },
#endif // CONFIG_EXT_TX
};
const uint8_t vp10_coefband_trans_8x8plus[1024] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5
5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
......@@ -125,6 +147,13 @@ const uint8_t vp10_coefband_trans_8x8plus[1024] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
#if CONFIG_EXT_TX
const uint8_t vp10_coefband_trans_8x4_4x8[32] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
#endif // CONFIG_EXT_TX
const uint8_t vp10_coefband_trans_4x4[16] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
};
......
......@@ -155,11 +155,28 @@ void vp10_partial_adapt_probs(struct VP10Common *cm, int mi_row, int mi_col);
#define MAXBAND_INDEX 21
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]);
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x4_4x8[32]);
#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]);
DECLARE_ALIGNED(16, extern const uint16_t,
band_count_table[TX_SIZES_ALL][8]);
DECLARE_ALIGNED(16, extern const uint16_t,
band_cum_count_table[TX_SIZES_ALL][8]);
static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
return tx_size == TX_4X4 ? vp10_coefband_trans_4x4
: vp10_coefband_trans_8x8plus;
switch (tx_size) {
case TX_4X4:
return vp10_coefband_trans_4x4;
#if CONFIG_EXT_TX
case TX_4X8:
case TX_8X4:
return vp10_coefband_trans_8x4_4x8;
#endif // CONFIG_EXT_TX
default:
return vp10_coefband_trans_8x8plus;
}
}
// 128 lists of probabilities are stored for the following ONE node probs:
......@@ -198,7 +215,8 @@ static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
return (a != 0) + (b != 0);
}
static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
static INLINE int get_entropy_context(TX_SIZE tx_size,
const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l) {
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
......@@ -207,6 +225,16 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = a[0] != 0;
left_ec = l[0] != 0;
break;
#if CONFIG_EXT_TX
case TX_4X8:
above_ec = a[0] != 0;
left_ec = !!*(const uint16_t *)l;
break;
case TX_8X4:
above_ec = !!*(const uint16_t *)a;
left_ec = l[0] != 0;
break;
#endif // CONFIG_EXT_TX
case TX_8X8:
above_ec = !!*(const uint16_t *)a;
left_ec = !!*(const uint16_t *)l;
......@@ -223,7 +251,6 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
assert(0 && "Invalid transform size.");
break;
}
return combine_entropy_contexts(above_ec, left_ec);
}
......
......@@ -137,6 +137,14 @@ typedef uint8_t TX_SIZE;
#define TX_32X32 ((TX_SIZE)3) // 32x32 transform
#define TX_SIZES ((TX_SIZE)4)
#if CONFIG_EXT_TX
#define TX_4X8 ((TX_SIZE)4) // 4x8 transform
#define TX_8X4 ((TX_SIZE)5) // 8x4 transform
#define TX_SIZES_ALL ((TX_SIZE)6) // Includes rectangular transforms
#else
#define TX_SIZES_ALL ((TX_SIZE)4)
#endif // CONFIG_EXT_TX
#define MAX_TX_SIZE_LOG2 5
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MIN_TX_SIZE_LOG2 2
......@@ -170,10 +178,10 @@ typedef enum {
} TX_TYPE_1D;
typedef enum {
DCT_DCT = 0, // DCT in both horizontal and vertical
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3, // ADST in both directions
DCT_DCT = 0, // DCT in both horizontal and vertical
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3, // ADST in both directions
#if CONFIG_EXT_TX
FLIPADST_DCT = 4,
DCT_FLIPADST = 5,
......
......@@ -144,7 +144,7 @@ static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
static void maybe_flip_strides(uint8_t **dst, int *dstride,
tran_low_t **src, int *sstride,
int tx_type, int size) {
int tx_type, int sizey, int sizex) {
// Note that the transpose of src will be added to dst. In order to LR
// flip the addends (in dst coordinates), we UD flip the src. To UD flip
// the addends, we UD flip the dst.
......@@ -163,19 +163,19 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride,
case FLIPADST_ADST:
case V_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
FLIPUD_PTR(*dst, *dstride, sizey);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
case H_FLIPADST:
// flip LR
FLIPUD_PTR(*src, *sstride, size);
FLIPUD_PTR(*src, *sstride, sizex);
break;
case FLIPADST_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
FLIPUD_PTR(*dst, *dstride, sizey);
// flip LR
FLIPUD_PTR(*src, *sstride, size);
FLIPUD_PTR(*src, *sstride, sizex);
break;
default:
assert(0);
......@@ -445,7 +445,7 @@ static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
static void maybe_flip_strides16(uint16_t **dst, int *dstride,
tran_low_t **src, int *sstride,
int tx_type, int size) {
int tx_type, int sizey, int sizex) {
// Note that the transpose of src will be added to dst. In order to LR
// flip the addends (in dst coordinates), we UD flip the src. To UD flip
// the addends, we UD flip the dst.
......@@ -464,19 +464,19 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride,
case FLIPADST_ADST:
case V_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
FLIPUD_PTR(*dst, *dstride, sizey);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
case H_FLIPADST:
// flip LR
FLIPUD_PTR(*src, *sstride, size);
FLIPUD_PTR(*src, *sstride, sizex);
break;
case FLIPADST_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
FLIPUD_PTR(*dst, *dstride, sizey);
// flip LR
FLIPUD_PTR(*src, *sstride, size);
FLIPUD_PTR(*src, *sstride, sizex);
break;
default:
assert(0);
......@@ -536,7 +536,7 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4);
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
#endif
// Sum with the destination
......@@ -549,6 +549,116 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_4x8[] = {
{ idct8_c, idct4_c }, // DCT_DCT
{ iadst8_c, idct4_c }, // ADST_DCT
{ idct8_c, iadst4_c }, // DCT_ADST
{ iadst8_c, iadst4_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ iadst8_c, idct4_c }, // FLIPADST_DCT
{ idct8_c, iadst4_c }, // DCT_FLIPADST
{ iadst8_c, iadst4_c }, // FLIPADST_FLIPADST
{ iadst8_c, iadst4_c }, // ADST_FLIPADST
{ iadst8_c, iadst4_c }, // FLIPADST_ADST
{ iidtx8_c, iidtx4_c }, // IDTX
{ idct8_c, iidtx4_c }, // V_DCT
{ iidtx8_c, idct4_c }, // H_DCT
{ iadst8_c, iidtx4_c }, // V_ADST
{ iidtx8_c, iadst4_c }, // H_ADST
{ iadst8_c, iidtx4_c }, // V_FLIPADST
{ iidtx8_c, iadst4_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
int i, j;
tran_low_t out[4][8], outtmp[4];
tran_low_t *outp = &out[0][0];
int outstride = 8;
// inverse transform row vectors and transpose
for (i = 0; i < 8; ++i) {
IHT_4x8[tx_type].rows(input, outtmp);
for (j = 0; j < 4; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += 4;
}
// inverse transform column vectors
for (i = 0; i < 4; ++i) {
IHT_4x8[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 4);
#endif
// Sum with the destination
for (i = 0; i < 8; ++i) {
for (j = 0; j < 4; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
}
}
}
void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_8x4[] = {
{ idct4_c, idct8_c }, // DCT_DCT
{ iadst4_c, idct8_c }, // ADST_DCT
{ idct4_c, iadst8_c }, // DCT_ADST
{ iadst4_c, iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ iadst4_c, idct8_c }, // FLIPADST_DCT
{ idct4_c, iadst8_c }, // DCT_FLIPADST
{ iadst4_c, iadst8_c }, // FLIPADST_FLIPADST
{ iadst4_c, iadst8_c }, // ADST_FLIPADST
{ iadst4_c, iadst8_c }, // FLIPADST_ADST
{ iidtx4_c, iidtx8_c }, // IDTX
{ idct4_c, iidtx8_c }, // V_DCT
{ iidtx4_c, idct8_c }, // H_DCT
{ iadst4_c, iidtx8_c }, // V_ADST
{ iidtx4_c, iadst8_c }, // H_ADST
{ iadst4_c, iidtx8_c }, // V_FLIPADST
{ iidtx4_c, iadst8_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
int i, j;
tran_low_t out[8][4], outtmp[8];
tran_low_t *outp = &out[0][0];
int outstride = 4;
// inverse transform row vectors and transpose
for (i = 0; i < 4; ++i) {
IHT_8x4[tx_type].rows(input, outtmp);
for (j = 0; j < 8; ++j)
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
input += 8;
}
// inverse transform column vectors
for (i = 0; i < 8; ++i) {
IHT_8x4[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 8);
#endif