diff --git a/av1/common/idct.c b/av1/common/idct.c index 56af73f4d845fcff51e25b086f7ef476d8da5c88..328f360b18c89a88581edefd87f3f591ffdf31f4 100644 --- a/av1/common/idct.c +++ b/av1/common/idct.c @@ -11,14 +11,14 @@ #include <math.h> -#include "./av1_rtcd.h" #include "./aom_dsp_rtcd.h" +#include "./av1_rtcd.h" +#include "aom_dsp/inv_txfm.h" +#include "aom_ports/mem.h" +#include "av1/common/av1_inv_txfm2d_cfg.h" #include "av1/common/blockd.h" #include "av1/common/enums.h" #include "av1/common/idct.h" -#include "av1/common/av1_inv_txfm2d_cfg.h" -#include "aom_dsp/inv_txfm.h" -#include "aom_ports/mem.h" int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type, const TX_SIZE tx_size) { @@ -179,249 +179,6 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src, } #if CONFIG_AOM_HIGHBITDEPTH -void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step[4]; - tran_high_t temp1, temp2; - (void)bd; - // stage 1 - temp1 = (input[3] + input[1]) * cospi_16_64; - temp2 = (input[3] - input[1]) * cospi_16_64; - step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64; - temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64; - step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - - // stage 2 - output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); - output[1] = HIGHBD_WRAPLOW(-step[1] - step[2], bd); - output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); - output[3] = HIGHBD_WRAPLOW(step[3] - step[0], bd); -} - -void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step1[8], step2[8]; - tran_high_t temp1, temp2; - (void)bd; - // stage 1 - step1[0] = input[7]; - step1[2] = input[3]; - step1[1] = input[5]; - step1[3] = input[1]; - temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64; - temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64; - temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - - // stage 2 - temp1 = (step1[0] + step1[2]) * cospi_16_64; - temp2 = (step1[0] - step1[2]) * cospi_16_64; - step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - // stage 3 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); - step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); - step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - // stage 4 - output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - output[1] = HIGHBD_WRAPLOW(-step1[1] - step1[6], bd); - output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - output[3] = HIGHBD_WRAPLOW(-step1[3] - step1[4], bd); - output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - output[5] = HIGHBD_WRAPLOW(-step1[2] + step1[5], bd); - output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - output[7] = HIGHBD_WRAPLOW(-step1[0] + step1[7], bd); -} - -void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) { - // av1_highbd_igentx16(input, output, bd, Tx16); - tran_low_t step1[16], step2[16]; - tran_high_t temp1, temp2; - (void)bd; - - // stage 1 - step1[0] = input[15]; - step1[1] = input[7]; - step1[2] = input[11]; - step1[3] = input[3]; - step1[4] = input[13]; - step1[5] = input[5]; - step1[6] = input[9]; - step1[7] = input[1]; - step1[8] = input[14]; - step1[9] = input[6]; - step1[10] = input[10]; - step1[11] = input[2]; - step1[12] = input[12]; - step1[13] = input[4]; - step1[14] = input[8]; - step1[15] = input[0]; - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); - step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); - step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); - step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); - step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - step2[11] = step1[11]; - step2[12] = step1[12]; - - // stage 5 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); - step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); - step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); - step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); - step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); - step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); - step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); - - // stage 6 - step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); - step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); - step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); - output[1] = HIGHBD_WRAPLOW(-step2[1] - step2[14], bd); - output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); - output[3] = HIGHBD_WRAPLOW(-step2[3] - step2[12], bd); - output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); - output[5] = HIGHBD_WRAPLOW(-step2[5] - step2[10], bd); - output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); - output[7] = HIGHBD_WRAPLOW(-step2[7] - step2[8], bd); - output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); - output[9] = HIGHBD_WRAPLOW(-step2[6] + step2[9], bd); - output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); - output[11] = HIGHBD_WRAPLOW(-step2[4] + step2[11], bd); - output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); - output[13] = HIGHBD_WRAPLOW(-step2[2] + step2[13], bd); - output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); - output[15] = HIGHBD_WRAPLOW(-step2[0] + step2[15], bd); -} - static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bs, int tx_type, int bd) { int r, c; diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c index 2cb0e80cd4872b36801ea47633f0b93c12f9e3c0..40f5bdf91a427aca946dec14796b646bd8b62375 100644 --- a/av1/encoder/dct.c +++ b/av1/encoder/dct.c @@ -12,13 +12,13 @@ #include <assert.h> #include <math.h> -#include "./av1_rtcd.h" #include "./aom_config.h" #include "./aom_dsp_rtcd.h" -#include "av1/common/blockd.h" -#include "av1/common/idct.h" +#include "./av1_rtcd.h" #include "aom_dsp/fwd_txfm.h" #include "aom_ports/mem.h" +#include "av1/common/blockd.h" +#include "av1/common/idct.h" static INLINE void range_check(const tran_low_t *input, const int size, const int bit) { @@ -1132,213 +1132,35 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w, } #endif // CONFIG_EXT_TX -static const transform_2d FHT_4[] = { - { fdct4, fdct4 }, // DCT_DCT - { fadst4, fdct4 }, // ADST_DCT - { fdct4, fadst4 }, // DCT_ADST - { fadst4, fadst4 }, // ADST_ADST -#if CONFIG_EXT_TX - { fadst4, fdct4 }, // FLIPADST_DCT - { fdct4, fadst4 }, // DCT_FLIPADST - { fadst4, fadst4 }, // FLIPADST_FLIPADST - { fadst4, fadst4 }, // ADST_FLIPADST - { fadst4, fadst4 }, // FLIPADST_ADST - { fidtx4, fidtx4 }, // IDTX - { fdct4, fidtx4 }, // V_DCT - { fidtx4, fdct4 }, // H_DCT - { fadst4, fidtx4 }, // V_ADST - { fidtx4, fadst4 }, // H_ADST - { fadst4, fidtx4 }, // V_FLIPADST - { fidtx4, fadst4 }, // H_FLIPADST -#endif // CONFIG_EXT_TX -}; - -static const transform_2d FHT_8[] = { - { fdct8, fdct8 }, // DCT_DCT - { fadst8, fdct8 }, // ADST_DCT - { fdct8, fadst8 }, // DCT_ADST - { fadst8, fadst8 }, // ADST_ADST -#if CONFIG_EXT_TX - { fadst8, fdct8 }, // FLIPADST_DCT - { fdct8, fadst8 }, // DCT_FLIPADST - { fadst8, fadst8 }, // FLIPADST_FLIPADST - { fadst8, fadst8 }, // ADST_FLIPADST - { fadst8, fadst8 }, // FLIPADST_ADST - { fidtx8, fidtx8 }, // IDTX - { fdct8, fidtx8 }, // V_DCT - { fidtx8, fdct8 }, // H_DCT - { fadst8, fidtx8 }, // V_ADST - { fidtx8, fadst8 }, // H_ADST - { fadst8, fidtx8 }, // V_FLIPADST - { fidtx8, fadst8 }, // H_FLIPADST -#endif // CONFIG_EXT_TX -}; - -static const transform_2d FHT_16[] = { - { fdct16, fdct16 }, // DCT_DCT - { fadst16, fdct16 }, // ADST_DCT - { fdct16, fadst16 }, // DCT_ADST - { fadst16, fadst16 }, // ADST_ADST -#if CONFIG_EXT_TX - { fadst16, fdct16 }, // FLIPADST_DCT - { fdct16, fadst16 }, // DCT_FLIPADST - { fadst16, fadst16 }, // FLIPADST_FLIPADST - { fadst16, fadst16 }, // ADST_FLIPADST - { fadst16, fadst16 }, // FLIPADST_ADST - { fidtx16, fidtx16 }, // IDTX - { fdct16, fidtx16 }, // V_DCT - { fidtx16, fdct16 }, // H_DCT - { fadst16, fidtx16 }, // V_ADST - { fidtx16, fadst16 }, // H_ADST - { fadst16, fidtx16 }, // V_FLIPADST - { fidtx16, fadst16 }, // H_FLIPADST -#endif // CONFIG_EXT_TX -}; - -#if CONFIG_EXT_TX -static const transform_2d FHT_32[] = { - { fdct32, fdct32 }, // DCT_DCT - { fhalfright32, fdct32 }, // ADST_DCT - { fdct32, fhalfright32 }, // DCT_ADST - { fhalfright32, fhalfright32 }, // ADST_ADST - { fhalfright32, fdct32 }, // FLIPADST_DCT - { fdct32, fhalfright32 }, // DCT_FLIPADST - { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST - { fhalfright32, fhalfright32 }, // ADST_FLIPADST - { fhalfright32, fhalfright32 }, // FLIPADST_ADST - { fidtx32, fidtx32 }, // IDTX - { fdct32, fidtx32 }, // V_DCT - { fidtx32, fdct32 }, // H_DCT - { fhalfright32, fidtx32 }, // V_ADST - { fidtx32, fhalfright32 }, // H_ADST - { fhalfright32, fidtx32 }, // V_FLIPADST - { fidtx32, fhalfright32 }, // H_FLIPADST -}; - -static const transform_2d FHT_4x8[] = { - { fdct8, fdct4 }, // DCT_DCT - { fadst8, fdct4 }, // ADST_DCT - { fdct8, fadst4 }, // DCT_ADST - { fadst8, fadst4 }, // ADST_ADST - { fadst8, fdct4 }, // FLIPADST_DCT - { fdct8, fadst4 }, // DCT_FLIPADST - { fadst8, fadst4 }, // FLIPADST_FLIPADST - { fadst8, fadst4 }, // ADST_FLIPADST - { fadst8, fadst4 }, // FLIPADST_ADST - { fidtx8, fidtx4 }, // IDTX - { fdct8, fidtx4 }, // V_DCT - { fidtx8, fdct4 }, // H_DCT - { fadst8, fidtx4 }, // V_ADST - { fidtx8, fadst4 }, // H_ADST - { fadst8, fidtx4 }, // V_FLIPADST - { fidtx8, fadst4 }, // H_FLIPADST -}; - -static const transform_2d FHT_8x4[] = { - { fdct4, fdct8 }, // DCT_DCT - { fadst4, fdct8 }, // ADST_DCT - { fdct4, fadst8 }, // DCT_ADST - { fadst4, fadst8 }, // ADST_ADST - { fadst4, fdct8 }, // FLIPADST_DCT - { fdct4, fadst8 }, // DCT_FLIPADST - { fadst4, fadst8 }, // FLIPADST_FLIPADST - { fadst4, fadst8 }, // ADST_FLIPADST - { fadst4, fadst8 }, // FLIPADST_ADST - { fidtx4, fidtx8 }, // IDTX - { fdct4, fidtx8 }, // V_DCT - { fidtx4, fdct8 }, // H_DCT - { fadst4, fidtx8 }, // V_ADST - { fidtx4, fadst8 }, // H_ADST - { fadst4, fidtx8 }, // V_FLIPADST - { fidtx4, fadst8 }, // H_FLIPADST -}; - -static const transform_2d FHT_8x16[] = { - { fdct16, fdct8 }, // DCT_DCT - { fadst16, fdct8 }, // ADST_DCT - { fdct16, fadst8 }, // DCT_ADST - { fadst16, fadst8 }, // ADST_ADST - { fadst16, fdct8 }, // FLIPADST_DCT - { fdct16, fadst8 }, // DCT_FLIPADST - { fadst16, fadst8 }, // FLIPADST_FLIPADST - { fadst16, fadst8 }, // ADST_FLIPADST - { fadst16, fadst8 }, // FLIPADST_ADST - { fidtx16, fidtx8 }, // IDTX - { fdct16, fidtx8 }, // V_DCT - { fidtx16, fdct8 }, // H_DCT - { fadst16, fidtx8 }, // V_ADST - { fidtx16, fadst8 }, // H_ADST - { fadst16, fidtx8 }, // V_FLIPADST - { fidtx16, fadst8 }, // H_FLIPADST -}; - -static const transform_2d FHT_16x8[] = { - { fdct8, fdct16 }, // DCT_DCT - { fadst8, fdct16 }, // ADST_DCT - { fdct8, fadst16 }, // DCT_ADST - { fadst8, fadst16 }, // ADST_ADST - { fadst8, fdct16 }, // FLIPADST_DCT - { fdct8, fadst16 }, // DCT_FLIPADST - { fadst8, fadst16 }, // FLIPADST_FLIPADST - { fadst8, fadst16 }, // ADST_FLIPADST - { fadst8, fadst16 }, // FLIPADST_ADST - { fidtx8, fidtx16 }, // IDTX - { fdct8, fidtx16 }, // V_DCT - { fidtx8, fdct16 }, // H_DCT - { fadst8, fidtx16 }, // V_ADST - { fidtx8, fadst16 }, // H_ADST - { fadst8, fidtx16 }, // V_FLIPADST - { fidtx8, fadst16 }, // H_FLIPADST -}; - -static const transform_2d FHT_16x32[] = { - { fdct32, fdct16 }, // DCT_DCT - { fhalfright32, fdct16 }, // ADST_DCT - { fdct32, fadst16 }, // DCT_ADST - { fhalfright32, fadst16 }, // ADST_ADST - { fhalfright32, fdct16 }, // FLIPADST_DCT - { fdct32, fadst16 }, // DCT_FLIPADST - { fhalfright32, fadst16 }, // FLIPADST_FLIPADST - { fhalfright32, fadst16 }, // ADST_FLIPADST - { fhalfright32, fadst16 }, // FLIPADST_ADST - { fidtx32, fidtx16 }, // IDTX - { fdct32, fidtx16 }, // V_DCT - { fidtx32, fdct16 }, // H_DCT - { fhalfright32, fidtx16 }, // V_ADST - { fidtx32, fadst16 }, // H_ADST - { fhalfright32, fidtx16 }, // V_FLIPADST - { fidtx32, fadst16 }, // H_FLIPADST -}; - -static const transform_2d FHT_32x16[] = { - { fdct16, fdct32 }, // DCT_DCT - { fadst16, fdct32 }, // ADST_DCT - { fdct16, fhalfright32 }, // DCT_ADST - { fadst16, fhalfright32 }, // ADST_ADST - { fadst16, fdct32 }, // FLIPADST_DCT - { fdct16, fhalfright32 }, // DCT_FLIPADST - { fadst16, fhalfright32 }, // FLIPADST_FLIPADST - { fadst16, fhalfright32 }, // ADST_FLIPADST - { fadst16, fhalfright32 }, // FLIPADST_ADST - { fidtx16, fidtx32 }, // IDTX - { fdct16, fidtx32 }, // V_DCT - { fidtx16, fdct32 }, // H_DCT - { fadst16, fidtx32 }, // V_ADST - { fidtx16, fhalfright32 }, // H_ADST - { fadst16, fidtx32 }, // V_FLIPADST - { fidtx16, fhalfright32 }, // H_FLIPADST -}; -#endif // CONFIG_EXT_TX - void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { aom_fdct4x4_c(input, output, stride); } else { + static const transform_2d FHT[] = { + { fdct4, fdct4 }, // DCT_DCT + { fadst4, fdct4 }, // ADST_DCT + { fdct4, fadst4 }, // DCT_ADST + { fadst4, fadst4 }, // ADST_ADST +#if CONFIG_EXT_TX + { fadst4, fdct4 }, // FLIPADST_DCT + { fdct4, fadst4 }, // DCT_FLIPADST + { fadst4, fadst4 }, // FLIPADST_FLIPADST + { fadst4, fadst4 }, // ADST_FLIPADST + { fadst4, fadst4 }, // FLIPADST_ADST + { fidtx4, fidtx4 }, // IDTX + { fdct4, fidtx4 }, // V_DCT + { fidtx4, fdct4 }, // H_DCT + { fadst4, fidtx4 }, // V_ADST + { fidtx4, fadst4 }, // H_ADST + { fadst4, fidtx4 }, // V_FLIPADST + { fidtx4, fadst4 }, // H_FLIPADST +#endif // CONFIG_EXT_TX + }; + const transform_2d ht = FHT[tx_type]; tran_low_t out[4 * 4]; int i, j; tran_low_t temp_in[4], temp_out[4]; - const transform_2d ht = FHT_4[tx_type]; #if CONFIG_EXT_TX int16_t flipped_input[4 * 4]; @@ -1365,12 +1187,30 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, #if CONFIG_EXT_TX void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { + static const transform_2d FHT[] = { + { fdct8, fdct4 }, // DCT_DCT + { fadst8, fdct4 }, // ADST_DCT + { fdct8, fadst4 }, // DCT_ADST + { fadst8, fadst4 }, // ADST_ADST + { fadst8, fdct4 }, // FLIPADST_DCT + { fdct8, fadst4 }, // DCT_FLIPADST + { fadst8, fadst4 }, // FLIPADST_FLIPADST + { fadst8, fadst4 }, // ADST_FLIPADST + { fadst8, fadst4 }, // FLIPADST_ADST + { fidtx8, fidtx4 }, // IDTX + { fdct8, fidtx4 }, // V_DCT + { fidtx8, fdct4 }, // H_DCT + { fadst8, fidtx4 }, // V_ADST + { fidtx8, fadst4 }, // H_ADST + { fadst8, fidtx4 }, // V_FLIPADST + { fidtx8, fadst4 }, // H_FLIPADST + }; + const transform_2d ht = FHT[tx_type]; const int n = 4; const int n2 = 8; tran_low_t out[8 * 4]; tran_low_t temp_in[8], temp_out[8]; int i, j; - const transform_2d ht = FHT_4x8[tx_type]; int16_t flipped_input[8 * 4]; maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type); @@ -1394,12 +1234,30 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { + static const transform_2d FHT[] = { + { fdct4, fdct8 }, // DCT_DCT + { fadst4, fdct8 }, // ADST_DCT + { fdct4, fadst8 }, // DCT_ADST + { fadst4, fadst8 }, // ADST_ADST + { fadst4, fdct8 }, // FLIPADST_DCT + { fdct4, fadst8 }, // DCT_FLIPADST + { fadst4, fadst8 }, // FLIPADST_FLIPADST + { fadst4, fadst8 }, // ADST_FLIPADST + { fadst4, fadst8 }, // FLIPADST_ADST + { fidtx4, fidtx8 }, // IDTX + { fdct4, fidtx8 }, // V_DCT + { fidtx4, fdct8 }, // H_DCT + { fadst4, fidtx8 }, // V_ADST + { fidtx4, fadst8 }, // H_ADST + { fadst4, fidtx8 }, // V_FLIPADST + { fidtx4, fadst8 }, // H_FLIPADST + }; + const transform_2d ht = FHT[tx_type]; const int n = 4; const int n2 = 8; tran_low_t out[8 * 4]; tran_low_t temp_in[8], temp_out[8]; int i, j; - const transform_2d ht = FHT_8x4[tx_type]; int16_t flipped_input[8 * 4]; maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type); @@ -1423,12 +1281,30 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { + static const transform_2d FHT[] = { + { fdct16, fdct8 }, // DCT_DCT + { fadst16, fdct8 }, // ADST_DCT + { fdct16, fadst8 }, // DCT_ADST + { fadst16, fadst8 }, // ADST_ADST + { fadst16, fdct8 }, // FLIPADST_DCT + { fdct16, fadst8 }, // DCT_FLIPADST + { fadst16, fadst8 }, // FLIPADST_FLIPADST + { fadst16, fadst8 }, // ADST_FLIPADST + { fadst16, fadst8 }, // FLIPADST_ADST + { fidtx16, fidtx8 }, // IDTX + { fdct16, fidtx8 }, // V_DCT + { fidtx16, fdct8 }, // H_DCT + { fadst16, fidtx8 }, // V_ADST + { fidtx16, fadst8 }, // H_ADST + { fadst16, fidtx8 }, // V_FLIPADST + { fidtx16, fadst8 }, // H_FLIPADST + }; + const transform_2d ht = FHT[tx_type]; const int n = 8; const int n2 = 16; tran_low_t out[16 * 8]; tran_low_t temp_in[16], temp_out[16]; int i, j; - const transform_2d ht = FHT_8x16[tx_type]; int16_t flipped_input[16 * 8]; maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type); @@ -1452,12 +1328,30 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { + static const transform_2d FHT[] = { + { fdct8, fdct16 }, // DCT_DCT + { fadst8, fdct16 }, // ADST_DCT + { fdct8, fadst16 }, // DCT_ADST + { fadst8, fadst16 }, // ADST_ADST + { fadst8, fdct16 }, // FLIPADST_DCT + { fdct8, fadst16 }, // DCT_FLIPADST + { fadst8, fadst16 }, // FLIPADST_FLIPADST + { fadst8, fadst16 }, // ADST_FLIPADST + { fadst8, fadst16 }, // FLIPADST_ADST + { fidtx8, fidtx16 }, // IDTX + { fdct8, fidtx16 }, // V_DCT + { fidtx8, fdct16 }, // H_DCT + { fadst8, fidtx16 }, // V_ADST + { fidtx8, fadst16 }, // H_ADST + { fadst8, fidtx16 }, // V_FLIPADST + { fidtx8, fadst16 }, // H_FLIPADST + }; + const transform_2d ht = FHT[tx_type]; const int n = 8; const int n2 = 16; tran_low_t out[16 * 8]; tran_low_t temp_in[16], temp_out[16]; int i, j; - const transform_2d ht = FHT_16x8[tx_type]; int16_t flipped_input[16 * 8]; maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type); @@ -1481,12 +1375,30 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { + static const transform_2d FHT[] = { + { fdct32, fdct16 }, // DCT_DCT + { fhalfright32, fdct16 }, // ADST_DCT + { fdct32, fadst16 }, // DCT_ADST + { fhalfright32, fadst16 }, // ADST_ADST + { fhalfright32, fdct16 }, // FLIPADST_DCT + { fdct32, fadst16 }, // DCT_FLIPADST + { fhalfright32, fadst16 }, // FLIPADST_FLIPADST + { fhalfright32, fadst16 }, // ADST_FLIPADST + { fhalfright32, fadst16 }, // FLIPADST_ADST + { fidtx32, fidtx16 }, // IDTX + { fdct32, fidtx16 }, // V_DCT + { fidtx32, fdct16 }, // H_DCT + { fhalfright32, fidtx16 }, // V_ADST + { fidtx32, fadst16 }, // H_ADST + { fhalfright32, fidtx16 }, // V_FLIPADST + { fidtx32, fadst16 }, // H_FLIPADST + }; + const transform_2d ht = FHT[tx_type]; const int n = 16; const int n2 = 32; tran_low_t out[32 * 16]; tran_low_t temp_in[32], temp_out[32]; int i, j; - const transform_2d ht = FHT_16x32[tx_type]; int16_t flipped_input[32 * 16]; maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type); @@ -1511,12 +1423,30 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { + static const transform_2d FHT[] = { + { fdct16, fdct32 }, // DCT_DCT + { fadst16, fdct32 }, // ADST_DCT + { fdct16, fhalfright32 }, // DCT_ADST + { fadst16, fhalfright32 }, // ADST_ADST + { fadst16, fdct32 }, // FLIPADST_DCT + { fdct16, fhalfright32 }, // DCT_FLIPADST + { fadst16, fhalfright32 }, // FLIPADST_FLIPADST + { fadst16, fhalfright32 }, // ADST_FLIPADST + { fadst16, fhalfright32 }, // FLIPADST_ADST + { fidtx16, fidtx32 }, // IDTX + { fdct16, fidtx32 }, // V_DCT + { fidtx16, fdct32 }, // H_DCT + { fadst16, fidtx32 }, // V_ADST + { fidtx16, fhalfright32 }, // H_ADST + { fadst16, fidtx32 }, // V_FLIPADST + { fidtx16, fhalfright32 }, // H_FLIPADST + }; + const transform_2d ht = FHT[tx_type]; const int n = 16; const int n2 = 32; tran_low_t out[32 * 16]; tran_low_t temp_in[32], temp_out[32]; int i, j; - const transform_2d ht = FHT_32x16[tx_type]; int16_t flipped_input[32 * 16]; maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type); @@ -1672,10 +1602,30 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, if (tx_type == DCT_DCT) { aom_fdct8x8_c(input, output, stride); } else { + static const transform_2d FHT[] = { + { fdct8, fdct8 }, // DCT_DCT + { fadst8, fdct8 }, // ADST_DCT + { fdct8, fadst8 }, // DCT_ADST + { fadst8, fadst8 }, // ADST_ADST +#if CONFIG_EXT_TX + { fadst8, fdct8 }, // FLIPADST_DCT + { fdct8, fadst8 }, // DCT_FLIPADST + { fadst8, fadst8 }, // FLIPADST_FLIPADST + { fadst8, fadst8 }, // ADST_FLIPADST + { fadst8, fadst8 }, // FLIPADST_ADST + { fidtx8, fidtx8 }, // IDTX + { fdct8, fidtx8 }, // V_DCT + { fidtx8, fdct8 }, // H_DCT + { fadst8, fidtx8 }, // V_ADST + { fidtx8, fadst8 }, // H_ADST + { fadst8, fidtx8 }, // V_FLIPADST + { fidtx8, fadst8 }, // H_FLIPADST +#endif // CONFIG_EXT_TX + }; + const transform_2d ht = FHT[tx_type]; tran_low_t out[64]; int i, j; tran_low_t temp_in[8], temp_out[8]; - const transform_2d ht = FHT_8[tx_type]; #if CONFIG_EXT_TX int16_t flipped_input[8 * 8]; @@ -1760,10 +1710,30 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, if (tx_type == DCT_DCT) { aom_fdct16x16_c(input, output, stride); } else { + static const transform_2d FHT[] = { + { fdct16, fdct16 }, // DCT_DCT + { fadst16, fdct16 }, // ADST_DCT + { fdct16, fadst16 }, // DCT_ADST + { fadst16, fadst16 }, // ADST_ADST +#if CONFIG_EXT_TX + { fadst16, fdct16 }, // FLIPADST_DCT + { fdct16, fadst16 }, // DCT_FLIPADST + { fadst16, fadst16 }, // FLIPADST_FLIPADST + { fadst16, fadst16 }, // ADST_FLIPADST + { fadst16, fadst16 }, // FLIPADST_ADST + { fidtx16, fidtx16 }, // IDTX + { fdct16, fidtx16 }, // V_DCT + { fidtx16, fdct16 }, // H_DCT + { fadst16, fidtx16 }, // V_ADST + { fidtx16, fadst16 }, // H_ADST + { fadst16, fidtx16 }, // V_FLIPADST + { fidtx16, fadst16 }, // H_FLIPADST +#endif // CONFIG_EXT_TX + }; + const transform_2d ht = FHT[tx_type]; tran_low_t out[256]; int i, j; tran_low_t temp_in[16], temp_out[16]; - const transform_2d ht = FHT_16[tx_type]; #if CONFIG_EXT_TX int16_t flipped_input[16 * 16]; @@ -1847,10 +1817,28 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, if (tx_type == DCT_DCT) { aom_fdct32x32_c(input, output, stride); } else { + static const transform_2d FHT[] = { + { fdct32, fdct32 }, // DCT_DCT + { fhalfright32, fdct32 }, // ADST_DCT + { fdct32, fhalfright32 }, // DCT_ADST + { fhalfright32, fhalfright32 }, // ADST_ADST + { fhalfright32, fdct32 }, // FLIPADST_DCT + { fdct32, fhalfright32 }, // DCT_FLIPADST + { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST + { fhalfright32, fhalfright32 }, // ADST_FLIPADST + { fhalfright32, fhalfright32 }, // FLIPADST_ADST + { fidtx32, fidtx32 }, // IDTX + { fdct32, fidtx32 }, // V_DCT + { fidtx32, fdct32 }, // H_DCT + { fhalfright32, fidtx32 }, // V_ADST + { fidtx32, fhalfright32 }, // H_ADST + { fhalfright32, fidtx32 }, // V_FLIPADST + { fidtx32, fhalfright32 }, // H_FLIPADST + }; + const transform_2d ht = FHT[tx_type]; tran_low_t out[1024]; int i, j; tran_low_t temp_in[32], temp_out[32]; - const transform_2d ht = FHT_32[tx_type]; int16_t flipped_input[32 * 32]; maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type);