Commit eec47e65 authored by Sarah Parker's avatar Sarah Parker

Refactor hbd txfm configurations to be 1D

The hbd transform configurations were originally written for all possible
2d transforms. Now that there are many more possible 2d transforms
due to EXT_TX and RECT_TX, it is simpler to write the cfg for the
4 1D transform types and compose them to make all new possible transform
types. This will allow for an easier integration of the identity transform
for EXT_TX and rectangular transforms for RECT_TX into the current
hbd transform codepath and facilitate the removal of obsolete transforms.
This has no impact on performance.

BUG=aomedia:524

Change-Id: I1e217bcd217fd637b1df94fae62d9c59a0523c1a
parent bb6e1343
......@@ -16,11 +16,11 @@ set(AOM_AV1_COMMON_SOURCES
"${AOM_ROOT}/av1/common/av1_fwd_txfm1d.c"
"${AOM_ROOT}/av1/common/av1_fwd_txfm1d.h"
"${AOM_ROOT}/av1/common/av1_fwd_txfm2d.c"
"${AOM_ROOT}/av1/common/av1_fwd_txfm2d_cfg.h"
"${AOM_ROOT}/av1/common/av1_fwd_txfm1d_cfg.h"
"${AOM_ROOT}/av1/common/av1_inv_txfm1d.c"
"${AOM_ROOT}/av1/common/av1_inv_txfm1d.h"
"${AOM_ROOT}/av1/common/av1_inv_txfm2d.c"
"${AOM_ROOT}/av1/common/av1_inv_txfm2d_cfg.h"
"${AOM_ROOT}/av1/common/av1_inv_txfm1d_cfg.h"
"${AOM_ROOT}/av1/common/av1_loopfilter.c"
"${AOM_ROOT}/av1/common/av1_loopfilter.h"
"${AOM_ROOT}/av1/common/av1_txfm.h"
......
......@@ -69,9 +69,9 @@ AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d.c
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d.c
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d_cfg.h
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
......
This diff is collapsed.
......@@ -14,7 +14,7 @@
#include "./av1_rtcd.h"
#include "av1/common/enums.h"
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm2d_cfg.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/av1_txfm.h"
static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
......@@ -35,14 +35,15 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_FLIP_CFG *cfg,
int32_t *buf) {
int c, r;
const int txfm_size = cfg->cfg->txfm_size;
const int8_t *shift = cfg->cfg->shift;
const int8_t *stage_range_col = cfg->cfg->stage_range_col;
const int8_t *stage_range_row = cfg->cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->cfg->txfm_type_col);
const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->cfg->txfm_type_row);
// TODO(sarahparker) must correct for rectangular transforms in follow up
const int txfm_size = cfg->row_cfg->txfm_size;
const int8_t *shift = cfg->row_cfg->shift;
const int8_t *stage_range_col = cfg->col_cfg->stage_range;
const int8_t *stage_range_row = cfg->row_cfg->stage_range;
const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
// use output buffer as temp buffer
int32_t *temp_in = output;
......@@ -117,69 +118,79 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
}
static const TXFM_2D_CFG *fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
static const TXFM_1D_CFG *fwd_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8,
&fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 },
&fwd_txfm_1d_col_cfg_dct_4, &fwd_txfm_1d_col_cfg_dct_8,
&fwd_txfm_1d_col_cfg_dct_16, &fwd_txfm_1d_col_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
&fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
&fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
&fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
&fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
#if CONFIG_EXT_TX
&fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
&fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
&fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
&fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
&fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
&fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
&fwd_txfm_1d_row_cfg_dct_4, &fwd_txfm_1d_row_cfg_dct_8,
&fwd_txfm_1d_row_cfg_dct_16, &fwd_txfm_1d_row_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
&fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
&fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
&fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
&fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
&fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
&fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size) {
TXFM_2D_FLIP_CFG cfg;
set_flip_cfg(tx_type, &cfg);
cfg.cfg = fwd_txfm_cfg_ls[tx_type][tx_size];
int tx_type_col = vtx_tab[tx_type];
int tx_type_row = htx_tab[tx_type];
cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size];
cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size];
return cfg;
}
......@@ -187,13 +198,11 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg;
switch (tx_type) {
case DCT_DCT:
cfg.cfg = &fwd_txfm_2d_cfg_dct_dct_64;
cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
cfg.ud_flip = 0;
cfg.lr_flip = 0;
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
default:
cfg.ud_flip = 0;
cfg.lr_flip = 0;
......
This diff is collapsed.
This diff is collapsed.
......@@ -13,7 +13,7 @@
#include "av1/common/enums.h"
#include "av1/common/av1_txfm.h"
#include "av1/common/av1_inv_txfm1d.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
......@@ -29,77 +29,90 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
}
}
static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
&inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
&inv_txfm_1d_col_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_8,
&inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
&inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
&inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
#if CONFIG_EXT_TX
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
&inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
&inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
&inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
&inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32 },
// ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
#if CONFIG_EXT_TX
// FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
// IDENTITY PLACEHOLDER
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
&inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
#endif // CONFIG_EXT_TX
};
TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(int tx_type, int tx_size) {
TXFM_2D_FLIP_CFG cfg;
set_flip_cfg(tx_type, &cfg);
cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];
int tx_type_col = vtx_tab[tx_type];
int tx_type_row = htx_tab[tx_type];
// TODO(sarahparker) this is currently only implemented for
// square transforms
cfg.col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size];
cfg.row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size];
return cfg;
}
TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL };
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
switch (tx_type) {
case DCT_DCT:
cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
set_flip_cfg(tx_type, &cfg);
break;
default: assert(0);
......@@ -110,14 +123,15 @@ TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
int stride, TXFM_2D_FLIP_CFG *cfg,
int32_t *txfm_buf) {
const int txfm_size = cfg->cfg->txfm_size;
const int8_t *shift = cfg->cfg->shift;
const int8_t *stage_range_col = cfg->cfg->stage_range_col;
const int8_t *stage_range_row = cfg->cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);
const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);
// TODO(sarahparker) must correct for rectangular transforms in follow up
const int txfm_size = cfg->row_cfg->txfm_size;
const int8_t *shift = cfg->row_cfg->shift;
const int8_t *stage_range_col = cfg->col_cfg->stage_range;
const int8_t *stage_range_row = cfg->row_cfg->stage_range;
const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->col_cfg->txfm_type);
const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->row_cfg->txfm_type);
// txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
// it is used for intermediate data buffering
......@@ -165,7 +179,11 @@ static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
// int16_t*
TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, tx_size);
inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
clamp_block((int16_t *)output, cfg.cfg->txfm_size, stride, 0, (1 << bd) - 1);
// TODO(sarahparker) just using the cfg_row->txfm_size for now because
// we are assumint this is only used for square transforms. This will
// be adjusted in a follow up
clamp_block((int16_t *)output, cfg.row_cfg->txfm_size, stride, 0,
(1 << bd) - 1);
}
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
......
This diff is collapsed.
......@@ -145,24 +145,21 @@ typedef enum TXFM_TYPE {
TXFM_TYPE_ADST32,
} TXFM_TYPE;
typedef struct TXFM_2D_CFG {
typedef struct TXFM_1D_CFG {
const int txfm_size;
const int stage_num_col;
const int stage_num_row;
const int stage_num;
const int8_t *shift;
const int8_t *stage_range_col;
const int8_t *stage_range_row;
const int8_t *cos_bit_col;
const int8_t *cos_bit_row;
const TXFM_TYPE txfm_type_col;
const TXFM_TYPE txfm_type_row;
} TXFM_2D_CFG;
const int8_t *stage_range;
const int8_t *cos_bit;
const TXFM_TYPE txfm_type;
} TXFM_1D_CFG;
typedef struct TXFM_2D_FLIP_CFG {
int ud_flip; // flip upside down
int lr_flip; // flip left to right
const TXFM_2D_CFG *cfg;
const TXFM_1D_CFG *col_cfg;
const TXFM_1D_CFG *row_cfg;
} TXFM_2D_FLIP_CFG;
static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
......@@ -176,10 +173,12 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case FLIPADST_ADST:
cfg->ud_flip = 1;
cfg->lr_flip = 0;
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
cfg->ud_flip = 0;
cfg->lr_flip = 1;
break;
......@@ -187,14 +186,6 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
cfg->ud_flip = 1;
cfg->lr_flip = 1;
break;
case ADST_FLIPADST:
cfg->ud_flip = 0;
cfg->lr_flip = 1;
break;
case FLIPADST_ADST:
cfg->ud_flip = 1;
cfg->lr_flip = 0;
break;
#endif // CONFIG_EXT_TX
default:
cfg->ud_flip = 0;
......
......@@ -491,6 +491,22 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = {
#define max_txsize_rect_lookup max_txsize_lookup
#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
DCT_1D, ADST_1D, DCT_1D, ADST_1D,
#if CONFIG_EXT_TX
FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
#endif // CONFIG_EXT_TX
};
static const TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D, DCT_1D, ADST_1D, ADST_1D,
#if CONFIG_EXT_TX
DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
#endif // CONFIG_EXT_TX
};
#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
// block which may use a rectangular transform, in which case it is
......
......@@ -15,7 +15,7 @@
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
......@@ -85,8 +85,7 @@ static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
inv_stage_range_col_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
......@@ -94,8 +93,7 @@ static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
inv_stage_range_row_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
......@@ -196,8 +194,7 @@ static void highbd_idct64_col_c(const tran_low_t *input, tran_low_t *output,
int i;
(void)bd;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
inv_stage_range_col_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
......@@ -207,8 +204,7 @@ static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
int i;
(void)bd;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
inv_stage_range_row_dct_dct_64);
av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
#endif // CONFIG_TX64X64
......
......@@ -37,16 +37,20 @@ static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
}
static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int stride,
const TXFM_2D_FLIP_CFG *cfg,
int32_t *txfm_buf) {
const int txfm_size = cfg->txfm_size;
const int8_t *shift = cfg->shift;
const int8_t *stage_range_col = cfg->stage_range_col;
const int8_t *stage_range_row = cfg->stage_range_row;
const int8_t *cos_bit_col = cfg->cos_bit_col;
const int8_t *cos_bit_row = cfg->cos_bit_row;
const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
// TODO(sarahparker) must correct for rectangular transforms in follow up
const int txfm_size = cfg->row_cfg->txfm_size;
const int8_t *shift = cfg->row_cfg->shift;
const int8_t *stage_range_col = cfg->col_cfg->stage_range;
const int8_t *stage_range_row = cfg->row_cfg->stage_range;
const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
const TxfmFuncSSE2 txfm_func_col =
fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
const TxfmFuncSSE2 txfm_func_row =
fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
__m128i *buf_128 = (__m128i *)txfm_buf;
__m128i *out_128 = (__m128i *)output;
......@@ -69,7 +73,7 @@ void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
}
void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
......@@ -77,5 +81,5 @@ void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
}
......@@ -13,7 +13,7 @@
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
// Note:
// Total 32x4 registers to represent 32x32 block coefficients.
......@@ -601,18 +601,20 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
void av1_inv_txfm2d_add_32x32_avx2(const int32_t *coeff, uint16_t *output,
int stride, int tx_type, int bd) {
__m256i in[128], out[128];
const TXFM_2D_CFG *cfg = NULL;
const TXFM_1D_CFG *row_cfg = NULL;
const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &inv_txfm_2d_cfg_dct_dct_32;
row_cfg = &inv_txfm_1d_row_cfg_dct_32;
col_cfg = &inv_txfm_1d_col_cfg_dct_32;
load_buffer_32x32(coeff, in);
transpose_32x32(in, out);
idct32_avx2(out, in, cfg->cos_bit_row[2]);
round_shift_32x32(in, -cfg->shift[0]);
idct32_avx2(out, in, row_cfg->cos_bit[2]);
round_shift_32x32(in, -row_cfg->shift[0]);
transpose_32x32(in, out);
idct32_avx2(out, in, cfg->cos_bit_col[2]);
write_buffer_32x32(in, output, stride, 0, 0, -cfg->shift[1], bd);
idct32_avx2(out, in, col_cfg->cos_bit[2]);
write_buffer_32x32(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
break;
default: assert(0);
}
......
This diff is collapsed.
......@@ -19,7 +19,7 @@
#include "aom_ports/mem.h"
#include "av1/common/blockd.h"
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm2d_cfg.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
static INLINE void range_check(const tran_low_t *input, const int size,
......@@ -2133,8 +2133,7 @@ static void fdct64_col(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_fdct64_new(in, out, fwd_cos_bit_col_dct_dct_64,
fwd_stage_range_col_dct_dct_64);
av1_fdct64_new(in, out, fwd_cos_bit_col_dct_64, fwd_stage_range_col_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
......@@ -2142,8 +2141,7 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
av1_fdct64_new(in, out, fwd_cos_bit_row_dct_dct_64,
fwd_stage_range_row_dct_dct_64);
av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
......
......@@ -418,22 +418,6 @@ static INLINE int write_uniform_cost(int n, int v) {
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3
static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
DCT_1D, ADST_1D, DCT_1D, ADST_1D,
#if CONFIG_EXT_TX
FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
#endif // CONFIG_EXT_TX
};
static const TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D, DCT_1D, ADST_1D, ADST_1D,
#if CONFIG_EXT_TX
DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
#endif // CONFIG_EXT_TX
};
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
int sum;
......
This diff is collapsed.
......@@ -41,9 +41,11 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
count_ = 500;
TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg =
av1_get_fwd_txfm_cfg(tx_type_, tx_size_);
const TXFM_2D_CFG *fwd_txfm_cfg = fwd_txfm_flip_cfg.cfg;
int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
fwd_txfm_cfg->shift[2];
// TODO(sarahparker) this test will need to be updated when these
// functions are extended to support rectangular transforms
int amplify_bit = fwd_txfm_flip_cfg.row_cfg->shift[0] +
fwd_txfm_flip_cfg.row_cfg->shift[1] +
fwd_txfm_flip_cfg.row_cfg->shift[2];
ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
amplify_factor_ =
......
......@@ -17,7 +17,7 @@
#include "test/acm_random.h"
#include "test/util.h"
#include "test/av1_txfm_test.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
using libaom_test::ACMRandom;
using libaom_test::input_base;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment