Commit 15b0113b authored by Urvang Joshi's avatar Urvang Joshi Committed by Debargha Mukherjee
Browse files

Rectangular transforms: smaller dim first always.

This is true independent of CONFIG_TXMG flag, so no need for the other
code path.

BUG=aomedia:1114

Change-Id: I572c5151ca866d9d430460fb353610540c9bf025
parent 7b88ade6
......@@ -248,7 +248,6 @@ void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
int32_t rinput[8 * 4];
uint16_t routput[8 * 4];
......@@ -263,10 +262,6 @@ void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 4 + 4]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd);
#endif
}
void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
......@@ -277,7 +272,6 @@ void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]);
int32_t rinput[16 * 8];
uint16_t routput[16 * 8];
......@@ -292,10 +286,6 @@ void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 8 + 8]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd);
#endif
}
void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
......@@ -306,7 +296,6 @@ void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]);
int32_t rinput[32 * 16];
uint16_t routput[32 * 16];
......@@ -321,10 +310,6 @@ void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 16 + 16]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd);
#endif
}
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
......@@ -379,7 +364,6 @@ void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
int32_t rinput[64 * 32];
uint16_t routput[64 * 32];
......@@ -394,11 +378,6 @@ void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32,
bd);
#endif // CONFIG_TXMG
}
void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
......@@ -437,7 +416,6 @@ void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
int32_t rinput[16 * 64];
uint16_t routput[16 * 64];
......@@ -452,11 +430,6 @@ void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16,
bd);
#endif // CONFIG_TXMG
}
#endif // CONFIG_TX64X64
......@@ -468,7 +441,6 @@ void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
int32_t rinput[4 * 16];
uint16_t routput[4 * 16];
......@@ -483,10 +455,6 @@ void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd);
#endif // CONFIG_TXMG
}
void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output,
......@@ -497,7 +465,6 @@ void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
int32_t rinput[8 * 32];
uint16_t routput[8 * 32];
......@@ -512,8 +479,4 @@ void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output,
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd);
#endif // CONFIG_TXMG
}
......@@ -169,7 +169,6 @@ static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
}
}
#if CONFIG_TXMG
static INLINE TX_SIZE av1_rotate_tx_size(TX_SIZE tx_size) {
switch (tx_size) {
case TX_4X4: return TX_4X4;
......@@ -220,7 +219,6 @@ static INLINE TX_TYPE av1_rotate_tx_type(TX_TYPE tx_type) {
default: assert(0); return TX_TYPES;
}
}
#endif // CONFIG_TXMG
// Utility function that returns the log of the ratio of the col and row
// sizes.
......
......@@ -161,7 +161,6 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 8]);
int16_t rinput[4 * 8];
TX_SIZE tx_size = TX_4X8;
......@@ -176,12 +175,6 @@ void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[4 * 8];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_4X8, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif
}
void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
......@@ -194,7 +187,6 @@ void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 16]);
int16_t rinput[8 * 16];
TX_SIZE tx_size = TX_8X16;
......@@ -209,12 +201,6 @@ void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[8 * 16];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_8X16, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif
}
void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
......@@ -227,7 +213,6 @@ void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 32]);
int16_t rinput[16 * 32];
TX_SIZE tx_size = TX_16X32;
......@@ -242,12 +227,6 @@ void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[16 * 32];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_16X32, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif
}
void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
......@@ -260,7 +239,6 @@ void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 16]);
int16_t rinput[4 * 16];
TX_SIZE tx_size = TX_4X16;
......@@ -275,12 +253,6 @@ void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[4 * 16];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_4X16, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif
}
void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride,
......@@ -293,7 +265,6 @@ void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 8]);
int16_t rinput[32 * 8];
TX_SIZE tx_size = TX_8X32;
......@@ -308,12 +279,6 @@ void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[8 * 32];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_8X32, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif
}
void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride,
......@@ -378,7 +343,6 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 64]);
int16_t rinput[64 * 32];
TX_SIZE tx_size = TX_32X64;
......@@ -393,12 +357,6 @@ void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[32 * 64];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_32X64, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif // CONFIG_TXMG
// Zero out the bottom 32x32 area.
memset(output + 32 * 32, 0, 32 * 32 * sizeof(*output));
......@@ -424,7 +382,6 @@ void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 16]);
int16_t rinput[64 * 16];
TX_SIZE tx_size = TX_16X64;
......@@ -439,12 +396,6 @@ void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[16 * 64];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_16X64, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif
// Zero out the bottom 16x32 area.
memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
// Note: no repacking needed here.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment