Commit 570423cf authored by Debargha Mukherjee's avatar Debargha Mukherjee

Misc fixes for 32x64 and 64x32 transforms

Change-Id: Ic843e99bd9b79cb9a0a26b95e3a48717ff2ec2a5
parent 5e70a114
......@@ -91,6 +91,7 @@ static const tran_high_t sinpi_4_9 = 15212;
// 16384 * sqrt(2)
static const tran_high_t Sqrt2 = 23170;
static const tran_high_t InvSqrt2 = 11585;
static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
......
......@@ -1547,6 +1547,16 @@ void av1_fidentity32_c(const int32_t *input, int32_t *output,
for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
range_check(0, input, output, 32, stage_range[0]);
}
#if CONFIG_TX64X64
void av1_fidentity64_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range) {
(void)cos_bit;
for (int i = 0; i < 64; ++i)
output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
range_check(0, input, output, 64, stage_range[0]);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
......
......@@ -26,8 +26,10 @@ void av1_fdct16_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
void av1_fdct32_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#if CONFIG_TX64X64
void av1_fdct64_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#endif // CONFIG_TX64X64
void av1_fadst4_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
......@@ -46,6 +48,10 @@ void av1_fidentity16_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
void av1_fidentity32_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#if CONFIG_TX64X64
void av1_fidentity64_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#ifdef __cplusplus
......
......@@ -36,6 +36,9 @@ static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
#if CONFIG_TX64X64
case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
default: assert(0); return NULL;
}
......
......@@ -1593,6 +1593,16 @@ void av1_iidentity32_c(const int32_t *input, int32_t *output,
for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
range_check(0, input, output, 32, stage_range[0]);
}
#if CONFIG_TX64X64
void av1_iidentity64_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range) {
(void)cos_bit;
for (int i = 0; i < 64; ++i)
output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
range_check(0, input, output, 64, stage_range[0]);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
......
......@@ -26,8 +26,10 @@ void av1_idct16_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
void av1_idct32_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#if CONFIG_TX64X64
void av1_idct64_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#endif // CONFIG_TX64X64
void av1_iadst4_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
......@@ -46,6 +48,10 @@ void av1_iidentity16_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
void av1_iidentity32_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#if CONFIG_TX64X64
void av1_iidentity64_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#ifdef __cplusplus
......
......@@ -173,6 +173,7 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_32 = {
TXFM_TYPE_DCT32 // .txfm_type
};
#if CONFIG_TX64X64
// ---------------- row config inv_dct_64 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
64, // .txfm_size
......@@ -182,6 +183,7 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
inv_cos_bit_row_dct_64, // .cos_bit
TXFM_TYPE_DCT64, // .txfm_type_col
};
#endif // CONFIG_TX64X64
// ---------------- row config inv_adst_4 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_4 = {
......@@ -353,5 +355,17 @@ static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_32 = {
NULL, // .cos_bit
TXFM_TYPE_IDENTITY32, // .txfm_type
};
#if CONFIG_TX64X64
// ---------------- row/col config inv_identity_32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_64 = {
64, // .txfm_size
1, // .stage_num
inv_shift_64, // .shift
inv_stage_range_idx_64, // .stage_range
NULL, // .cos_bit
TXFM_TYPE_IDENTITY64, // .txfm_type
};
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#endif // AV1_INV_TXFM2D_CFG_H_
......@@ -34,6 +34,9 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c;
#if CONFIG_TX64X64
case TXFM_TYPE_IDENTITY64: return av1_iidentity64_c;
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
default: assert(0); return NULL;
}
......@@ -46,14 +49,22 @@ static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
NULL,
#endif
&inv_txfm_1d_col_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_8,
&inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32 },
&inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32,
#if CONFIG_TX64X64
&inv_txfm_1d_col_cfg_dct_64
#endif // CONFIG_TX64X64
},
// ADST
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
#if CONFIG_TX64X64
NULL
#endif // CONFIG_TX64X64
},
#if CONFIG_EXT_TX
// FLIPADST
{
......@@ -61,14 +72,22 @@ static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
NULL,
#endif
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
#if CONFIG_TX64X64
NULL
#endif // CONFIG_TX64X64
},
// IDENTITY
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
&inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32 },
&inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
#if CONFIG_TX64X64
&inv_txfm_1d_cfg_identity_64
#endif // CONFIG_TX64X64
},
#endif // CONFIG_EXT_TX
};
......@@ -79,14 +98,22 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
NULL,
#endif
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32 },
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
#if CONFIG_TX64X64
&inv_txfm_1d_row_cfg_dct_64,
#endif // CONFIG_TX64X64
},
// ADST
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
NULL
#endif // CONFIG_TX64X64
},
#if CONFIG_EXT_TX
// FLIPADST
{
......@@ -94,14 +121,22 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
NULL,
#endif
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
NULL
#endif // CONFIG_TX64X64
},
// IDENTITY
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
&inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32 },
&inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
#if CONFIG_TX64X64
&inv_txfm_1d_cfg_identity_64
#endif // CONFIG_TX64X64
},
#endif // CONFIG_EXT_TX
};
......@@ -117,6 +152,7 @@ TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(int tx_type, int tx_size) {
return cfg;
}
#if CONFIG_TX64X64
TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
switch (tx_type) {
......@@ -130,6 +166,33 @@ TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
return cfg;
}
TXFM_2D_FLIP_CFG av1_get_inv_txfm_32x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
switch (tx_type) {
case DCT_DCT:
cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_32;
set_flip_cfg(tx_type, &cfg);
break;
default: assert(0);
}
return cfg;
}
TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x32_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
switch (tx_type) {
case DCT_DCT:
cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_32;
cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
set_flip_cfg(tx_type, &cfg);
break;
default: assert(0);
}
return cfg;
}
#endif // CONFIG_TX64X64
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
int bd) {
......@@ -353,15 +416,30 @@ void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
int txfm_buf[64 * 64 + 64 + 64];
TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_64x64_cfg(tx_type);
inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, -4, bd);
assert(fwd_shift_sum[TX_64X64] == -4);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X64, bd);
}
void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
#if CONFIG_TXMG
int txfm_buf[64 * 32 + 64 + 64];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X64, bd);
int32_t rinput[64 * 32];
uint16_t routput[64 * 32];
int tx_size = TX_64X32;
int rtx_size = av1_rotate_tx_size(tx_size);
int rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int32(rinput, rw, input, w, w, h);
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
int txfm_buf[64 * 32 + 64 + 64];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X32, bd);
#endif
}
void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
......
......@@ -134,6 +134,7 @@ typedef enum TXFM_TYPE {
TXFM_TYPE_IDENTITY8,
TXFM_TYPE_IDENTITY16,
TXFM_TYPE_IDENTITY32,
TXFM_TYPE_IDENTITY64,
} TXFM_TYPE;
typedef struct TXFM_1D_CFG {
......
......@@ -988,9 +988,9 @@ static INLINE TxSetType get_ext_tx_set_type(TX_SIZE tx_size, BLOCK_SIZE bs,
const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
#if CONFIG_CB4X4 && USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
(void)bs;
if (tx_size_sqr > TX_32X32) return EXT_TX_SET_DCTONLY;
if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY;
#else
if (tx_size_sqr > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
if (tx_size_sqr_up > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
#endif
if (use_reduced_set)
return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
......
......@@ -665,9 +665,9 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
TX_8X16, TX_16X8, TX_16X16,
// 16X32, 32X16, 32X32
TX_16X32, TX_32X16, TX_32X32,
#if CONFIG_TX64X64
// 32X64, 64X32,
TX_32X32, TX_32X32,
#if CONFIG_TX64X64
// 64X64
TX_64X64,
#if CONFIG_EXT_PARTITION
......@@ -675,6 +675,8 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
TX_64X64, TX_64X64, TX_64X64,
#endif // CONFIG_EXT_PARTITION
#else
// 32X64, 64X32,
TX_32X32, TX_32X32,
// 64X64
TX_32X32,
#if CONFIG_EXT_PARTITION
......@@ -775,9 +777,9 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
// 16X32, 32X16, 32X32
TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
// 32X64, 64X32,
TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
#if CONFIG_TX64X64
// 32X64, 64X32,
TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
// 64X64
TX_64X64 - TX_8X8,
#if CONFIG_EXT_PARTITION
......@@ -785,6 +787,8 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
#endif // CONFIG_EXT_PARTITION
#else
// 32X64, 64X32,
TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
// 64X64
TX_32X32 - TX_8X8,
#if CONFIG_EXT_PARTITION
......@@ -818,9 +822,9 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
TX_8X8 - TX_8X8, TX_8X8 - TX_8X8, TX_16X16 - TX_8X8,
// 16X32, 32X16, 32X32
TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_32X32 - TX_8X8,
// 32X64, 64X32,
TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
#if CONFIG_TX64X64
// 32X64, 64X32,
TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
// 64X64
TX_64X64 - TX_8X8,
#if CONFIG_EXT_PARTITION
......@@ -828,6 +832,8 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
#endif // CONFIG_EXT_PARTITION
#else
// 32X64, 64X32,
TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
// 64X64
TX_32X32 - TX_8X8,
#if CONFIG_EXT_PARTITION
......
......@@ -1566,7 +1566,7 @@ void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (i = 0; i < n; ++i) {
IHT_64x32[tx_type].rows(input, outtmp);
for (j = 0; j < n2; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
input += n2;
}
......@@ -1628,7 +1628,7 @@ void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (i = 0; i < n2; ++i) {
IHT_32x64[tx_type].rows(input, outtmp);
for (j = 0; j < n; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
input += n;
}
......@@ -2107,6 +2107,7 @@ static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const TX_TYPE tx_type = txfm_param->tx_type;
assert(tx_type == DCT_DCT);
switch (tx_type) {
#if !CONFIG_DAALA_DCT64
case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
......
......@@ -4572,7 +4572,7 @@ DECLARE_ALIGNED(16, static const int16_t,
1726, 1789, 1789, 1852, 1852, 1915, 1915, 1978, 1978, 2041, 1727, 1790, 1790,
1853, 1853, 1916, 1916, 1979, 1979, 2042, 1791, 1854, 1854, 1917, 1917, 1980,
1980, 2043, 1855, 1918, 1918, 1981, 1981, 2044, 1919, 1982, 1982, 2045, 1983,
2046,
2046, 0, 0
};
DECLARE_ALIGNED(16, static const int16_t,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment