Commit 1158bff7 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Various fixes to scale managed txfms and tests

This patch clears all test failures with coeff range checking
enabled for forward and inverse transforms. Also this
patch ensures that there are no transposes for any of the
rectangular transforms.
Some fine-tunnig and refactoring are still pending.
Some of the tests still need to be rewritten.

Change-Id: Ib0e3a4ceccef665ba007d121f536fad7135f38d5
parent f0930dca
......@@ -16,8 +16,13 @@
// sum of fwd_shift_##
#if CONFIG_TX64X64
static const int8_t fwd_shift_sum[TX_SIZES] = { 2, 1, 0, -2, -4 };
#else // CONFIG_TX64X64
static const int8_t inv_start_range[TX_SIZES_ALL] = {
5, 6, 7, 7, 7, 6, 6, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, 7, 7
};
#else // CONFIG_TX64X64
static const int8_t fwd_shift_sum[TX_SIZES] = { 2, 1, 0, -2 };
static const int8_t inv_start_range[TX_SIZES_ALL] = { 5, 6, 7, 7, 6, 6, 7,
7, 7, 7, 6, 6, 7, 7 };
#endif // CONFIG_TX64X64
// ---------------- 4x4 1D config -----------------------
......@@ -25,7 +30,7 @@ static const int8_t fwd_shift_sum[TX_SIZES] = { 2, 1, 0, -2 };
static const int8_t inv_shift_4[2] = { 0, -4 };
// stage range
static const int8_t inv_stage_range_col_dct_4[4] = { 3, 3, 2, 2 };
static const int8_t inv_stage_range_col_dct_4[4] = { 3, 3, 3, 3 };
static const int8_t inv_stage_range_row_dct_4[4] = { 3, 3, 3, 3 };
static const int8_t inv_stage_range_col_adst_4[6] = { 3, 3, 3, 3, 2, 2 };
static const int8_t inv_stage_range_row_adst_4[6] = { 3, 3, 3, 3, 3, 3 };
......@@ -111,11 +116,18 @@ static const int8_t inv_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
// ---------------- 8x16 1D constants -----------------------
#define inv_shift_8x16 inv_shift_16
// stage range
static const int8_t inv_stage_range_row_dct_8x16[6] = { 5, 5, 5, 5, 5, 5 };
static const int8_t inv_stage_range_row_adst_8x16[8] = {
5, 5, 5, 5, 5, 5, 5, 5
};
static const int8_t inv_stage_range_col_dct_8x16[8] =
ARRAYOFFSET8(-2, 7, 7, 7, 7, 7, 7, 7, 7);
static const int8_t inv_stage_range_col_adst_8x16[10] =
ARRAYOFFSET10(-2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7);
// cos bit
static const int8_t inv_cos_bit_row_dct_8x16[6] = { 12, 12, 12, 12, 12, 12 };
static const int8_t inv_cos_bit_row_adst_8x16[8] = { 12, 12, 12, 12,
12, 12, 12, 12 };
static const int8_t inv_cos_bit_col_dct_8x16[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t inv_cos_bit_col_adst_8x16[10] = { 13, 13, 13, 13, 13,
......@@ -249,11 +261,18 @@ static const int8_t inv_cos_bit_col_adst_16x4[6] = { 13, 13, 13, 13, 13, 13 };
// ---------------- 8x32 1D constants -----------------------
#define inv_shift_8x32 inv_shift_32
// stage range
static const int8_t inv_stage_range_row_dct_8x32[6] = { 5, 5, 5, 5, 5, 5 };
static const int8_t inv_stage_range_row_adst_8x32[8] = {
5, 5, 5, 5, 5, 5, 5, 5
};
static const int8_t inv_stage_range_col_dct_8x32[10] =
ARRAYOFFSET10(-4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
static const int8_t inv_stage_range_col_adst_8x32[12] =
ARRAYOFFSET12(-4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
// cos bit
static const int8_t inv_cos_bit_row_dct_8x32[6] = { 12, 12, 12, 12, 12, 12 };
static const int8_t inv_cos_bit_row_adst_8x32[8] = { 12, 12, 12, 12,
12, 12, 12, 12 };
static const int8_t inv_cos_bit_col_dct_8x32[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t inv_cos_bit_col_adst_8x32[12] = { 13, 13, 13, 13, 13, 13,
......@@ -692,6 +711,16 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_4x16 = {
TXFM_TYPE_ADST16, // .txfm_type
};
// ---------------- row config inv_dct_8x16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_8x16 = {
8, // .txfm_size
6, // .stage_num
inv_shift_8x16, // .shift
inv_stage_range_row_dct_8x16, // .stage_range
inv_cos_bit_row_dct_8x16, // .cos_bit
TXFM_TYPE_DCT8 // .txfm_type
};
// ---------------- col config inv_dct_8x16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8x16 = {
16, // .txfm_size
......@@ -702,6 +731,16 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8x16 = {
TXFM_TYPE_DCT16 // .txfm_type
};
// ---------------- row config inv_adst_8x16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_8x16 = {
8, // .txfm_size
8, // .stage_num
inv_shift_8x16, // .shift
inv_stage_range_row_adst_8x16, // .stage_range
inv_cos_bit_row_adst_8x16, // .cos_bit
TXFM_TYPE_ADST8, // .txfm_type
};
// ---------------- col config inv_adst_8x16 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_8x16 = {
16, // .txfm_size
......@@ -744,6 +783,16 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_64x16 = {
};
#endif // CONFIG_TX64X64
// ---------------- row config inv_dct_8x32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_8x32 = {
8, // .txfm_size
6, // .stage_num
inv_shift_8x32, // .shift
inv_stage_range_row_dct_8x32, // .stage_range
inv_cos_bit_row_dct_8x32, // .cos_bit_col
TXFM_TYPE_DCT8 // .txfm_type
};
// ---------------- col config inv_dct_8x32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8x32 = {
32, // .txfm_size
......@@ -754,6 +803,16 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8x32 = {
TXFM_TYPE_DCT32 // .txfm_type
};
// ---------------- row config inv_adst_8x32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_8x32 = {
8, // .txfm_size
8, // .stage_num
inv_shift_8x32, // .shift
inv_stage_range_row_adst_8x32, // .stage_range
inv_cos_bit_row_adst_8x32, // .cos_bit
TXFM_TYPE_ADST8, // .txfm_type
};
// ---------------- col config inv_adst_8x32 ----------------
static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_8x32 = {
32, // .txfm_size
......
......@@ -17,6 +17,8 @@
#include "av1/common/av1_inv_txfm1d.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#define NO_INV_TRANSPOSE 1
static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
case TXFM_TYPE_DCT4: return av1_idct4_new;
......@@ -141,21 +143,21 @@ static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
// DCT
{
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
#if CONFIG_TX64X64
&inv_txfm_1d_row_cfg_dct_64,
#endif // CONFIG_TX64X64
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
&inv_txfm_1d_row_cfg_dct_8, &inv_txfm_1d_row_cfg_dct_16,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
&inv_txfm_1d_row_cfg_dct_8x16, &inv_txfm_1d_row_cfg_dct_16,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
#if CONFIG_TX64X64
&inv_txfm_1d_row_cfg_dct_32, &inv_txfm_1d_row_cfg_dct_64,
&inv_txfm_1d_row_cfg_dct_32, &inv_txfm_1d_row_cfg_dct_64,
#endif // CONFIG_TX64X64
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_16,
&inv_txfm_1d_row_cfg_dct_8, &inv_txfm_1d_row_cfg_dct_32,
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_16,
&inv_txfm_1d_row_cfg_dct_8x32, &inv_txfm_1d_row_cfg_dct_32,
#if CONFIG_TX64X64
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_64,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_64,
#endif // CONFIG_TX64X64
},
// ADST
......@@ -169,7 +171,7 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
#endif // CONFIG_TX64X64
&inv_txfm_1d_row_cfg_adst_4,
&inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_8x16,
&inv_txfm_1d_row_cfg_adst_16,
&inv_txfm_1d_row_cfg_adst_16,
&inv_txfm_1d_row_cfg_adst_32,
......@@ -179,7 +181,7 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
#endif // CONFIG_TX64X64
&inv_txfm_1d_row_cfg_adst_4,
&inv_txfm_1d_row_cfg_adst_16,
&inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_8x32,
&inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
&inv_txfm_1d_row_cfg_adst_16,
......@@ -197,7 +199,7 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
#endif // CONFIG_TX64X64
&inv_txfm_1d_row_cfg_adst_4,
&inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_8x16,
&inv_txfm_1d_row_cfg_adst_16,
&inv_txfm_1d_row_cfg_adst_16,
&inv_txfm_1d_row_cfg_adst_32,
......@@ -207,7 +209,7 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
#endif // CONFIG_TX64X64
&inv_txfm_1d_row_cfg_adst_4,
&inv_txfm_1d_row_cfg_adst_16,
&inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_8x32,
&inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
&inv_txfm_1d_row_cfg_adst_16,
......@@ -246,7 +248,7 @@ void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
}
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
int bd) {
// Note when assigning txfm_size_col, we use the txfm_size from the
// row configuration and vice versa. This is intentionally done to
......@@ -254,25 +256,43 @@ void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
// rectangular, the number of columns will be the same as the
// txfm_size stored in the row cfg struct. It will make no difference
// for square transforms.
// const int fwd_shift = fwd_shift_sum[txsize_sqr_up_map[tx_size]];
const int fwd_shift = inv_start_range[tx_size];
const int txfm_size_col = cfg->row_cfg->txfm_size;
const int txfm_size_row = cfg->col_cfg->txfm_size;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
if (txfm_size_col == txfm_size_row) assert(rect_type == 0);
int rect_type2_shift = 0;
if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 16x4 / 4x16 shift 1 bit, for 32x8 / 8x32 / 64x16 / 16x64 no need
// for any additional shift.
rect_type2_shift = (txfm_size_max == 16 ? 1 : 0);
}
// Take the shift from the larger dimension in the rectangular case.
const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
: cfg->col_cfg->shift;
int shift1 = shift[1];
while (rect_type2_shift > 0 && shift1 < 0) {
shift1++;
rect_type2_shift--;
}
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
stage_range_row[i] = cfg->row_cfg->stage_range[i] + fwd_shift + bd + 1;
stage_range_row[i] = cfg->row_cfg->stage_range[i] + fwd_shift + bd + 1 -
cfg->row_cfg->stage_range[0];
}
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
stage_range_col[i] =
cfg->col_cfg->stage_range[i] + fwd_shift + shift[0] + bd + 1;
stage_range_col[i] = cfg->col_cfg->stage_range[i] + fwd_shift + shift[0] +
bd + 1 - cfg->col_cfg->stage_range[0] +
rect_type2_shift;
}
}
static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
int stride, TXFM_2D_FLIP_CFG *cfg,
int32_t *txfm_buf, int8_t fwd_shift,
int32_t *txfm_buf, TX_SIZE tx_size,
int bd) {
// Note when assigning txfm_size_col, we use the txfm_size from the
// row configuration and vice versa. This is intentionally done to
......@@ -282,22 +302,27 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
// for square transforms.
const int txfm_size_col = cfg->row_cfg->txfm_size;
const int txfm_size_row = cfg->col_cfg->txfm_size;
// Take the shift from the larger dimension in the rectangular case.
const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
: cfg->col_cfg->shift;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
int rect_type2_shift = 0;
int shift1 = shift[1];
if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 16x4 / 4x16 shift 1 bit, for 32x8 / 8x32 / 64x16 / 16x64 no need
// for any additional shift.
rect_type2_shift = (txfm_size_max == 16 ? 1 : 0);
while (rect_type2_shift > 0 && shift1 < 0) {
shift1++;
rect_type2_shift--;
}
}
// Take the shift from the larger dimension in the rectangular case.
const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
: cfg->col_cfg->shift;
int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
assert(cfg->row_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, fwd_shift, bd);
av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd);
const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
......@@ -340,7 +365,7 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
av1_round_shift_array(temp_out, txfm_size_row, -shift1);
if (cfg->ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
......@@ -364,9 +389,7 @@ static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg);
// Forward shift sum uses larger square size, to be consistent with what
// av1_gen_inv_stage_range() does for inverse shifts.
const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf,
fwd_shift_sum[tx_size_sqr_up], bd);
inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd);
}
void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
......@@ -378,6 +401,9 @@ void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
#if NO_INV_TRANSPOSE
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd);
#else
int32_t rinput[8 * 4];
uint16_t routput[8 * 4];
TX_SIZE tx_size = TX_8X4;
......@@ -391,6 +417,7 @@ void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#endif // NO_INV_TRANSPOSE
}
void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
......@@ -402,6 +429,9 @@ void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]);
#if NO_INV_TRANSPOSE
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd);
#else
int32_t rinput[16 * 8];
uint16_t routput[16 * 8];
TX_SIZE tx_size = TX_16X8;
......@@ -415,6 +445,7 @@ void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#endif // NO_INV_TRANSPOSE
}
void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
......@@ -426,6 +457,9 @@ void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]);
#if NO_INV_TRANSPOSE
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd);
#else
int32_t rinput[32 * 16];
uint16_t routput[32 * 16];
TX_SIZE tx_size = TX_32X16;
......@@ -439,6 +473,7 @@ void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#endif // NO_INV_TRANSPOSE
}
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
......@@ -494,6 +529,10 @@ void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
#if NO_INV_TRANSPOSE
inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32,
bd);
#else
int32_t rinput[64 * 32];
uint16_t routput[64 * 32];
TX_SIZE tx_size = TX_64X32;
......@@ -507,6 +546,7 @@ void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#endif // NO_INV_TRANSPOSE
}
void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
......@@ -546,6 +586,10 @@ void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
#if NO_INV_TRANSPOSE
inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16,
bd);
#else
int32_t rinput[16 * 64];
uint16_t routput[16 * 64];
TX_SIZE tx_size = TX_64X16;
......@@ -559,6 +603,7 @@ void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#endif // NO_INV_TRANSPOSE
}
#endif // CONFIG_TX64X64
......@@ -571,6 +616,9 @@ void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
#if NO_INV_TRANSPOSE
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd);
#else
int32_t rinput[4 * 16];
uint16_t routput[4 * 16];
TX_SIZE tx_size = TX_16X4;
......@@ -584,6 +632,7 @@ void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#endif // NO_INV_TRANSPOSE
}
void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output,
......@@ -595,6 +644,9 @@ void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
#if NO_INV_TRANSPOSE
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd);
#else
int32_t rinput[8 * 32];
uint16_t routput[8 * 32];
TX_SIZE tx_size = TX_32X8;
......@@ -608,4 +660,5 @@ void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#endif // NO_INV_TRANSPOSE
}
......@@ -263,7 +263,7 @@ void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const TXFM_2D_FLIP_CFG *cfg, int bd);
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
int bd);
void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
......
......@@ -209,8 +209,8 @@ static const int8_t fwd_stage_range_row_dct_4x16[4] =
ARRAYOFFSET4(4, 0, 1, 2, 2);
static const int8_t fwd_stage_range_row_adst_4x16[6] =
ARRAYOFFSET6(4, 0, 0, 1, 2, 2, 2);
static const int8_t fwd_cos_bit_row_dct_4x16[6] = { 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_4x16[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_4x16[6] = { 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_adst_4x16[6] = { 12, 12, 12, 12, 12, 12 };
// ---------------- 16x4 1D constants -----------------------
#define fwd_shift_16x4 fwd_shift_16
......@@ -218,10 +218,10 @@ static const int8_t fwd_stage_range_row_dct_16x4[8] =
ARRAYOFFSET8(2, 0, 1, 2, 3, 4, 4, 4, 4);
static const int8_t fwd_stage_range_row_adst_16x4[10] =
ARRAYOFFSET10(2, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4);
static const int8_t fwd_cos_bit_row_dct_16x4[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_16x4[10] = { 13, 13, 13, 13, 13,
13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_16x4[8] = { 12, 12, 12, 12,
12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_adst_16x4[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
// ---------------- 8x32 1D constants -----------------------
#define fwd_shift_8x32 fwd_shift_32
......@@ -229,9 +229,9 @@ static const int8_t fwd_stage_range_row_dct_8x32[6] =
ARRAYOFFSET6(5, 0, 1, 2, 3, 3, 3);
static const int8_t fwd_stage_range_row_adst_8x32[8] =
ARRAYOFFSET8(5, 0, 0, 1, 2, 2, 3, 3, 3);
static const int8_t fwd_cos_bit_row_dct_8x32[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_8x32[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_8x32[6] = { 12, 12, 11, 11, 11, 11 };
static const int8_t fwd_cos_bit_row_adst_8x32[8] = { 12, 12, 12, 12,
11, 11, 11, 11 };
// ---------------- 32x8 1D constants -----------------------
#define fwd_shift_32x8 fwd_shift_32
......@@ -239,17 +239,17 @@ static const int8_t fwd_stage_range_row_dct_32x8[10] =
ARRAYOFFSET10(3, 0, 1, 2, 3, 4, 5, 5, 5, 5, 5);
static const int8_t fwd_stage_range_row_adst_32x8[12] =
ARRAYOFFSET12(3, 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5);
static const int8_t fwd_cos_bit_row_dct_32x8[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_32x8[10] = { 12, 12, 12, 12, 11,
11, 11, 11, 11, 11 };
static const int8_t fwd_cos_bit_row_adst_32x8[12] = { 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12 };
12, 11, 11, 11, 11, 11 };
// ---------------- 16x64 1D constants -----------------------
#define fwd_shift_16x64 fwd_shift_64
static const int8_t fwd_stage_range_row_dct_16x64[8] =
ARRAYOFFSET8(6, 0, 1, 2, 3, 4, 4, 4, 4);
static const int8_t fwd_cos_bit_row_dct_16x64[8] = { 12, 12, 12, 11,
11, 11, 11, 11 };
static const int8_t fwd_cos_bit_row_dct_16x64[8] = { 12, 11, 10, 10,
10, 10, 10, 10 };
// ---------------- 64x16 1D constants -----------------------
#define fwd_shift_64x16 fwd_shift_64
......
......@@ -19,6 +19,8 @@
#include "av1/encoder/av1_fwd_txfm1d.h"
#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
#define NO_FWD_TRANSPOSE 1
static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
case TXFM_TYPE_DCT4: return av1_fdct4_new;
......@@ -61,10 +63,23 @@ void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
stage_range_col[i] = cfg->col_cfg->stage_range[i] + shift[0] + bd + 1;
}
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
int rect_shift = 0;
int shift2 = shift[2];
if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 64x16 / 16x64 / 32x8 / 8x32 shift 2 bits, and
// For 16x4 / 4x16 shift by 1 bit.
rect_shift = (txfm_size_max >= 32) ? 2 : 1;
}
while (rect_shift > 0 && shift2 < 0) {
shift2++;
rect_shift--;
}
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
stage_range_row[i] =
cfg->row_cfg->stage_range[i] + shift[0] + shift[1] + bd + 1;
stage_range_row[i] = cfg->row_cfg->stage_range[i] + shift[0] + shift[1] +
bd + 1 + rect_shift;
}
}
......@@ -80,6 +95,10 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
// for square transforms.
const int txfm_size_col = cfg->row_cfg->txfm_size;
const int txfm_size_row = cfg->col_cfg->txfm_size;
// Take the shift from the larger dimension in the rectangular case.
const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
: cfg->col_cfg->shift;
int shift2 = shift[2];
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
int rect_type2_shift = 0;
if (rect_type == 2 || rect_type == -2) {
......@@ -87,10 +106,11 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
// For 64x16 / 16x64 / 32x8 / 8x32 shift 2 bits, and
// For 16x4 / 4x16 shift by 1 bit.
rect_type2_shift = (txfm_size_max >= 32) ? 2 : 1;
while (rect_type2_shift > 0 && shift2 < 0) {
shift2++;
rect_type2_shift--;
}
}
// Take the shift from the larger dimension in the rectangular case.
const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
: cfg->col_cfg->shift;
int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
......@@ -116,17 +136,17 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
temp_in[r] = input[(txfm_size_row - r - 1) * stride + c];
}
av1_round_shift_array(temp_in, txfm_size_row, -shift[0]);
txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
// Multiply everything by Sqrt2 on the larger dimension if the
// transform is rectangular and the size difference is a factor of 2.
// If the size difference is a factor of 4, multiply by
// 2^rect_type_2_extra_shift.
if (rect_type == 1) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = (int32_t)fdct_round_shift(temp_in[r] * Sqrt2);
temp_out[r] = (int32_t)fdct_round_shift(temp_out[r] * Sqrt2);
} else if (rect_type == 2) {
av1_round_shift_array(temp_in, txfm_size_row, -rect_type2_shift);
av1_round_shift_array(temp_out, txfm_size_row, -rect_type2_shift);
}
txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (cfg->lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
......@@ -154,13 +174,18 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
}
txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col,
cos_bit_row, stage_range_row);
av1_round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift[2]);
av1_round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift2);
}
}
void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 8]);
TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
av1_get_fwd_txfm_cfg(tx_type, TX_4X8, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
int16_t rinput[4 * 8];
TX_SIZE tx_size = TX_4X8;
TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
......@@ -170,10 +195,10 @@ void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
int rw = h;
int rh = w;
transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#endif // NO_FWD_TRANSPOSE
}
void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
......@@ -187,6 +212,11 @@ void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 16]);
TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
av1_get_fwd_txfm_cfg(