Commit c502216a authored by Urvang Joshi's avatar Urvang Joshi

Pass TXFM_2D_FLIP_CFG by pointer; don't return.

This avoids copying a whole struct on return. This is important, because
this function is called frequently: once for every block that is transformed.

Overall, aomenc speed seems to be improved slightly: by 0.15% - 0.30%.

Change-Id: If44c351e1141c0fe0458daa2262aeb4afe44de46
parent 57356711
...@@ -171,12 +171,14 @@ void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, ...@@ -171,12 +171,14 @@ void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
int rw = h; int rw = h;
int rh = w; int rh = w;
transpose_int16(rinput, rw, input, stride, w, h); transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd); fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh); transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else #else
int32_t txfm_buf[4 * 8]; int32_t txfm_buf[4 * 8];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X8); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_4X8, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif #endif
} }
...@@ -184,7 +186,8 @@ void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, ...@@ -184,7 +186,8 @@ void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[8 * 4]; int32_t txfm_buf[8 * 4];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X4); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_8X4, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
...@@ -201,12 +204,14 @@ void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, ...@@ -201,12 +204,14 @@ void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
int rw = h; int rw = h;
int rh = w; int rh = w;
transpose_int16(rinput, rw, input, stride, w, h); transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd); fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh); transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else #else
int32_t txfm_buf[8 * 16]; int32_t txfm_buf[8 * 16];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X16); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_8X16, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif #endif
} }
...@@ -214,7 +219,8 @@ void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, ...@@ -214,7 +219,8 @@ void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[16 * 8]; int32_t txfm_buf[16 * 8];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X8); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_16X8, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
...@@ -231,12 +237,14 @@ void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, ...@@ -231,12 +237,14 @@ void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
int rw = h; int rw = h;
int rh = w; int rh = w;
transpose_int16(rinput, rw, input, stride, w, h); transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd); fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh); transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else #else
int32_t txfm_buf[16 * 32]; int32_t txfm_buf[16 * 32];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X32); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_16X32, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif #endif
} }
...@@ -244,7 +252,8 @@ void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, ...@@ -244,7 +252,8 @@ void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[32 * 16]; int32_t txfm_buf[32 * 16];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X16); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_32X16, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
...@@ -262,12 +271,14 @@ void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride, ...@@ -262,12 +271,14 @@ void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride,
int rw = h; int rw = h;
int rh = w; int rh = w;
transpose_int16(rinput, rw, input, stride, w, h); transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd); fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh); transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else #else
int32_t txfm_buf[4 * 16]; int32_t txfm_buf[4 * 16];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X16); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_4X16, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif #endif
} }
...@@ -275,7 +286,8 @@ void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride, ...@@ -275,7 +286,8 @@ void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[16 * 4]; int32_t txfm_buf[16 * 4];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X4); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_16X4, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
...@@ -292,12 +304,14 @@ void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride, ...@@ -292,12 +304,14 @@ void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride,
int rw = h; int rw = h;
int rh = w; int rh = w;
transpose_int16(rinput, rw, input, stride, w, h); transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd); fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh); transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else #else
int32_t txfm_buf[8 * 32]; int32_t txfm_buf[8 * 32];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X32); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_8X32, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif #endif
} }
...@@ -305,7 +319,8 @@ void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride, ...@@ -305,7 +319,8 @@ void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[32 * 8]; int32_t txfm_buf[32 * 8];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X8); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_32X8, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
#endif // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT #endif // CONFIG_EXT_PARTITION_TYPES && CONFIG_RECT_TX_EXT
...@@ -313,28 +328,32 @@ void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride, ...@@ -313,28 +328,32 @@ void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[4 * 4]; int32_t txfm_buf[4 * 4];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X4); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_4X4, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[8 * 8]; int32_t txfm_buf[8 * 8];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X8); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_8X8, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[16 * 16]; int32_t txfm_buf[16 * 16];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X16); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_16X16, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[32 * 32]; int32_t txfm_buf[32 * 32];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
} }
...@@ -342,7 +361,8 @@ void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, ...@@ -342,7 +361,8 @@ void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[64 * 64]; int32_t txfm_buf[64 * 64];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_64X64); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
// Zero out top-right 32x32 area. // Zero out top-right 32x32 area.
...@@ -360,7 +380,8 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, ...@@ -360,7 +380,8 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[32 * 64]; int32_t txfm_buf[32 * 64];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X64); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_32X64, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
// Zero out the bottom 32x32 area. // Zero out the bottom 32x32 area.
...@@ -371,7 +392,8 @@ void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride, ...@@ -371,7 +392,8 @@ void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride, void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) { TX_TYPE tx_type, int bd) {
int32_t txfm_buf[64 * 32]; int32_t txfm_buf[64 * 32];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_64X32); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_64X32, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
// Zero out right 32x32 area. // Zero out right 32x32 area.
...@@ -447,14 +469,14 @@ static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = { ...@@ -447,14 +469,14 @@ static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
}, },
}; };
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size) { void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
TXFM_2D_FLIP_CFG cfg; TXFM_2D_FLIP_CFG *cfg) {
set_flip_cfg(tx_type, &cfg); assert(cfg != NULL);
set_flip_cfg(tx_type, cfg);
const TX_TYPE_1D tx_type_col = vtx_tab[tx_type]; const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
const TX_TYPE_1D tx_type_row = htx_tab[tx_type]; const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
const TX_SIZE tx_size_col = txsize_vert_map[tx_size]; const TX_SIZE tx_size_col = txsize_vert_map[tx_size];
const TX_SIZE tx_size_row = txsize_horz_map[tx_size]; const TX_SIZE tx_size_row = txsize_horz_map[tx_size];
cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col]; cfg->col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row]; cfg->row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
return cfg;
} }
...@@ -103,16 +103,16 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = { ...@@ -103,16 +103,16 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
}, },
}; };
TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size) { void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
TXFM_2D_FLIP_CFG cfg; TXFM_2D_FLIP_CFG *cfg) {
set_flip_cfg(tx_type, &cfg); assert(cfg != NULL);
set_flip_cfg(tx_type, cfg);
const TX_TYPE_1D tx_type_col = vtx_tab[tx_type]; const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
const TX_TYPE_1D tx_type_row = htx_tab[tx_type]; const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
const TX_SIZE tx_size_col = txsize_vert_map[tx_size]; const TX_SIZE tx_size_col = txsize_vert_map[tx_size];
const TX_SIZE tx_size_row = txsize_horz_map[tx_size]; const TX_SIZE tx_size_row = txsize_horz_map[tx_size];
cfg.col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size_col]; cfg->col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size_col];
cfg.row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size_row]; cfg->row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size_row];
return cfg;
} }
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
...@@ -230,7 +230,8 @@ static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output, ...@@ -230,7 +230,8 @@ static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
int stride, int32_t *txfm_buf, int stride, int32_t *txfm_buf,
TX_TYPE tx_type, TX_SIZE tx_size, TX_TYPE tx_type, TX_SIZE tx_size,
int bd) { int bd) {
TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, tx_size); TXFM_2D_FLIP_CFG cfg;
av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg);
TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size]; TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf,
fwd_shift_sum[tx_size_sqr], bd); fwd_shift_sum[tx_size_sqr], bd);
......
...@@ -258,8 +258,10 @@ void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, ...@@ -258,8 +258,10 @@ void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift, const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
int bd); int bd);
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size); void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size); TXFM_2D_FLIP_CFG *cfg);
void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
TXFM_2D_FLIP_CFG *cfg);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus
......
...@@ -76,7 +76,8 @@ static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output, ...@@ -76,7 +76,8 @@ static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd) { int stride, TX_TYPE tx_type, int bd) {
DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]); DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32); TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg);
(void)bd; (void)bd;
fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf); fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
} }
...@@ -39,8 +39,8 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> { ...@@ -39,8 +39,8 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
max_error_ = GET_PARAM(2); max_error_ = GET_PARAM(2);
max_avg_error_ = GET_PARAM(3); max_avg_error_ = GET_PARAM(3);
count_ = 500; count_ = 500;
TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg = TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
av1_get_fwd_txfm_cfg(tx_type_, tx_size_); av1_get_fwd_txfm_cfg(tx_type_, tx_size_, &fwd_txfm_flip_cfg);
// TODO(sarahparker) this test will need to be updated when these // TODO(sarahparker) this test will need to be updated when these
// functions are extended to support rectangular transforms // functions are extended to support rectangular transforms
int amplify_bit = fwd_txfm_flip_cfg.row_cfg->shift[0] + int amplify_bit = fwd_txfm_flip_cfg.row_cfg->shift[0] +
...@@ -186,8 +186,9 @@ TEST(AV1FwdTxfm2d, CfgTest) { ...@@ -186,8 +186,9 @@ TEST(AV1FwdTxfm2d, CfgTest) {
#if CONFIG_TX64X64 #if CONFIG_TX64X64
if (tx_size == TX_64X64 && tx_type != DCT_DCT) continue; if (tx_size == TX_64X64 && tx_type != DCT_DCT) continue;
#endif // CONFIG_TX64X64 #endif // CONFIG_TX64X64
const TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg( TXFM_2D_FLIP_CFG cfg;
static_cast<TX_TYPE>(tx_type), static_cast<TX_SIZE>(tx_size)); av1_get_fwd_txfm_cfg(static_cast<TX_TYPE>(tx_type),
static_cast<TX_SIZE>(tx_size), &cfg);
int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd); av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd);
......
...@@ -193,8 +193,9 @@ TEST(AV1InvTxfm2d, CfgTest) { ...@@ -193,8 +193,9 @@ TEST(AV1InvTxfm2d, CfgTest) {
#if CONFIG_TX64X64 #if CONFIG_TX64X64
if (tx_size == TX_64X64 && tx_type != DCT_DCT) continue; if (tx_size == TX_64X64 && tx_type != DCT_DCT) continue;
#endif // CONFIG_TX64X64 #endif // CONFIG_TX64X64
const TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg( TXFM_2D_FLIP_CFG cfg;
static_cast<TX_TYPE>(tx_type), static_cast<TX_SIZE>(tx_size)); av1_get_inv_txfm_cfg(static_cast<TX_TYPE>(tx_type),
static_cast<TX_SIZE>(tx_size), &cfg);
int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
av1_gen_inv_stage_range(stage_range_col, stage_range_row, &cfg, av1_gen_inv_stage_range(stage_range_col, stage_range_row, &cfg,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment