Commit ad396850 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add Daala TX to 16x32 and 32x16 transforms

Rectangular 416x32 and 32x16 will now use Daala TX when CONFIG_DAALA_DCT16 and
CONFIG_DAALA_DCT32 are both enabled.

Change-Id: Iab3737605fa10dc09ceab18856a26165c502e6e5
parent 7eb4454b
...@@ -1696,6 +1696,26 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1696,6 +1696,26 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d IHT_16x32[] = { static const transform_2d IHT_16x32[] = {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
{ daala_idct32, daala_idct16 }, // DCT_DCT = 0
{ daala_idst32, daala_idct16 }, // ADST_DCT = 1
{ daala_idct32, daala_idst16 }, // DCT_ADST = 2
{ daala_idst32, daala_idst16 }, // ADST_ADST = 3
#if CONFIG_EXT_TX
{ daala_idst32, daala_idct16 }, // FLIPADST_DCT
{ daala_idct32, daala_idst16 }, // DCT_FLIPADST
{ daala_idst32, daala_idst16 }, // FLIPADST_FLIPADST
{ daala_idst32, daala_idst16 }, // ADST_FLIPADST
{ daala_idst32, daala_idst16 }, // FLIPADST_ADST
{ daala_idtx32, daala_idtx16 }, // IDTX
{ daala_idct32, daala_idtx16 }, // V_DCT
{ daala_idtx32, daala_idct16 }, // H_DCT
{ daala_idst32, daala_idtx16 }, // V_ADST
{ daala_idtx32, daala_idst16 }, // H_ADST
{ daala_idst32, daala_idtx16 }, // V_FLIPADST
{ daala_idtx32, daala_idst16 }, // H_FLIPADST
#endif
#else
{ aom_idct32_c, aom_idct16_c }, // DCT_DCT { aom_idct32_c, aom_idct16_c }, // DCT_DCT
{ ihalfright32_c, aom_idct16_c }, // ADST_DCT { ihalfright32_c, aom_idct16_c }, // ADST_DCT
{ aom_idct32_c, aom_iadst16_c }, // DCT_ADST { aom_idct32_c, aom_iadst16_c }, // DCT_ADST
...@@ -1713,6 +1733,7 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1713,6 +1733,7 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx32_c, aom_iadst16_c }, // H_ADST { iidtx32_c, aom_iadst16_c }, // H_ADST
{ ihalfright32_c, iidtx16_c }, // V_FLIPADST { ihalfright32_c, iidtx16_c }, // V_FLIPADST
{ iidtx32_c, aom_iadst16_c }, // H_FLIPADST { iidtx32_c, aom_iadst16_c }, // H_FLIPADST
#endif
#endif #endif
}; };
...@@ -1725,9 +1746,16 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1725,9 +1746,16 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors and transpose // inverse transform row vectors and transpose
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
tran_low_t temp_in[16];
for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
IHT_16x32[tx_type].rows(temp_in, outtmp);
for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j] * 4;
#else
IHT_16x32[tx_type].rows(input, outtmp); IHT_16x32[tx_type].rows(input, outtmp);
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
#endif
input += n; input += n;
} }
...@@ -1743,7 +1771,11 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1743,7 +1771,11 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < n; ++j) { for (j = 0; j < n; ++j) {
int d = i * stride + j; int d = i * stride + j;
int s = j * outstride + i; int s = j * outstride + i;
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
#endif
} }
} }
} }
...@@ -1758,6 +1790,26 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1758,6 +1790,26 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d IHT_32x16[] = { static const transform_2d IHT_32x16[] = {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
{ daala_idct16, daala_idct32 }, // DCT_DCT = 0
{ daala_idst16, daala_idct32 }, // ADST_DCT = 1
{ daala_idct16, daala_idst32 }, // DCT_ADST = 2
{ daala_idst16, daala_idst32 }, // ADST_ADST = 3
#if CONFIG_EXT_TX
{ daala_idst16, daala_idct32 }, // FLIPADST_DCT
{ daala_idct16, daala_idst32 }, // DCT_FLIPADST
{ daala_idst16, daala_idst32 }, // FLIPADST_FLIPADST
{ daala_idst16, daala_idst32 }, // ADST_FLIPADST
{ daala_idst16, daala_idst32 }, // FLIPADST_ADST
{ daala_idtx16, daala_idtx32 }, // IDTX
{ daala_idct16, daala_idtx32 }, // V_DCT
{ daala_idtx16, daala_idct32 }, // H_DCT
{ daala_idst16, daala_idtx32 }, // V_ADST
{ daala_idtx16, daala_idst32 }, // H_ADST
{ daala_idst16, daala_idtx32 }, // V_FLIPADST
{ daala_idtx16, daala_idst32 }, // H_FLIPADST
#endif
#else
{ aom_idct16_c, aom_idct32_c }, // DCT_DCT { aom_idct16_c, aom_idct32_c }, // DCT_DCT
{ aom_iadst16_c, aom_idct32_c }, // ADST_DCT { aom_iadst16_c, aom_idct32_c }, // ADST_DCT
{ aom_idct16_c, ihalfright32_c }, // DCT_ADST { aom_idct16_c, ihalfright32_c }, // DCT_ADST
...@@ -1775,6 +1827,7 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1775,6 +1827,7 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx16_c, ihalfright32_c }, // H_ADST { iidtx16_c, ihalfright32_c }, // H_ADST
{ aom_iadst16_c, iidtx32_c }, // V_FLIPADST { aom_iadst16_c, iidtx32_c }, // V_FLIPADST
{ iidtx16_c, ihalfright32_c }, // H_FLIPADST { iidtx16_c, ihalfright32_c }, // H_FLIPADST
#endif
#endif #endif
}; };
const int n = 16; const int n = 16;
...@@ -1787,9 +1840,16 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1787,9 +1840,16 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors and transpose // inverse transform row vectors and transpose
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
tran_low_t temp_in[32];
for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
IHT_32x16[tx_type].rows(temp_in, outtmp);
for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 4;
#else
IHT_32x16[tx_type].rows(input, outtmp); IHT_32x16[tx_type].rows(input, outtmp);
for (j = 0; j < n2; ++j) for (j = 0; j < n2; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
#endif
input += n2; input += n2;
} }
...@@ -1805,7 +1865,11 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1805,7 +1865,11 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < n2; ++j) { for (j = 0; j < n2; ++j) {
int d = i * stride + j; int d = i * stride + j;
int s = j * outstride + i; int s = j * outstride + i;
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
#endif
} }
} }
} }
...@@ -2623,12 +2687,20 @@ static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest, ...@@ -2623,12 +2687,20 @@ static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest, static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) { int stride, const TxfmParam *txfm_param) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
av1_iht16x32_512_add_c(input, dest, stride, txfm_param);
#else
av1_iht16x32_512_add(input, dest, stride, txfm_param); av1_iht16x32_512_add(input, dest, stride, txfm_param);
#endif
} }
static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) { int stride, const TxfmParam *txfm_param) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
av1_iht32x16_512_add_c(input, dest, stride, txfm_param);
#else
av1_iht32x16_512_add(input, dest, stride, txfm_param); av1_iht32x16_512_add(input, dest, stride, txfm_param);
#endif
} }
#if CONFIG_TX64X64 #if CONFIG_TX64X64
......
...@@ -2273,6 +2273,26 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -2273,6 +2273,26 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d FHT[] = { static const transform_2d FHT[] = {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
{ daala_fdct32, daala_fdct16 }, // DCT_DCT
{ daala_fdst32, daala_fdct16 }, // ADST_DCT
{ daala_fdct32, daala_fdst16 }, // DCT_ADST
{ daala_fdst32, daala_fdst16 }, // ADST_ADST
#if CONFIG_EXT_TX
{ daala_fdst32, daala_fdct16 }, // FLIPADST_DCT
{ daala_fdct32, daala_fdst16 }, // DCT_FLIPADST
{ daala_fdst32, daala_fdst16 }, // FLIPADST_FLIPADST
{ daala_fdst32, daala_fdst16 }, // ADST_FLIPADST
{ daala_fdst32, daala_fdst16 }, // FLIPADST_ADST
{ daala_idtx32, daala_idtx16 }, // IDTX
{ daala_fdct32, daala_idtx16 }, // V_DCT
{ daala_idtx32, daala_fdct16 }, // H_DCT
{ daala_fdst32, daala_idtx16 }, // V_ADST
{ daala_idtx32, daala_fdst16 }, // H_ADST
{ daala_fdst32, daala_idtx16 }, // V_FLIPADST
{ daala_idtx32, daala_fdst16 }, // H_FLIPADST
#endif
#else
{ fdct32, fdct16 }, // DCT_DCT { fdct32, fdct16 }, // DCT_DCT
{ fhalfright32, fdct16 }, // ADST_DCT { fhalfright32, fdct16 }, // ADST_DCT
{ fdct32, fadst16 }, // DCT_ADST { fdct32, fadst16 }, // DCT_ADST
...@@ -2290,6 +2310,7 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -2290,6 +2310,7 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx32, fadst16 }, // H_ADST { fidtx32, fadst16 }, // H_ADST
{ fhalfright32, fidtx16 }, // V_FLIPADST { fhalfright32, fidtx16 }, // V_FLIPADST
{ fidtx32, fadst16 }, // H_FLIPADST { fidtx32, fadst16 }, // H_FLIPADST
#endif
#endif #endif
}; };
const transform_2d ht = FHT[tx_type]; const transform_2d ht = FHT[tx_type];
...@@ -2305,12 +2326,22 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -2305,12 +2326,22 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
// Rows // Rows
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
temp_in[j] = input[i * stride + j] * 16;
#else
temp_in[j] = temp_in[j] =
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2); (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
#endif
}
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#else
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4); out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
#endif
}
} }
// Columns // Columns
...@@ -2332,6 +2363,26 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -2332,6 +2363,26 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d FHT[] = { static const transform_2d FHT[] = {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
{ daala_fdct16, daala_fdct32 }, // DCT_DCT
{ daala_fdst16, daala_fdct32 }, // ADST_DCT
{ daala_fdct16, daala_fdst32 }, // DCT_ADST
{ daala_fdst16, daala_fdst32 }, // ADST_ADST
#if CONFIG_EXT_TX
{ daala_fdst16, daala_fdct32 }, // FLIPADST_DCT
{ daala_fdct16, daala_fdst32 }, // DCT_FLIPADST
{ daala_fdst16, daala_fdst32 }, // FLIPADST_FLIPADST
{ daala_fdst16, daala_fdst32 }, // ADST_FLIPADST
{ daala_fdst16, daala_fdst32 }, // FLIPADST_ADST
{ daala_idtx16, daala_idtx32 }, // IDTX
{ daala_fdct16, daala_idtx32 }, // V_DCT
{ daala_idtx16, daala_fdct32 }, // H_DCT
{ daala_fdst16, daala_idtx32 }, // V_ADST
{ daala_idtx16, daala_fdst32 }, // H_ADST
{ daala_fdst16, daala_idtx32 }, // V_FLIPADST
{ daala_idtx16, daala_fdst32 }, // H_FLIPADST
#endif
#else
{ fdct16, fdct32 }, // DCT_DCT { fdct16, fdct32 }, // DCT_DCT
{ fadst16, fdct32 }, // ADST_DCT { fadst16, fdct32 }, // ADST_DCT
{ fdct16, fhalfright32 }, // DCT_ADST { fdct16, fhalfright32 }, // DCT_ADST
...@@ -2349,6 +2400,7 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -2349,6 +2400,7 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx16, fhalfright32 }, // H_ADST { fidtx16, fhalfright32 }, // H_ADST
{ fadst16, fidtx32 }, // V_FLIPADST { fadst16, fidtx32 }, // V_FLIPADST
{ fidtx16, fhalfright32 }, // H_FLIPADST { fidtx16, fhalfright32 }, // H_FLIPADST
#endif
#endif #endif
}; };
const transform_2d ht = FHT[tx_type]; const transform_2d ht = FHT[tx_type];
...@@ -2364,12 +2416,22 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -2364,12 +2416,22 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
// Columns // Columns
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
temp_in[j] = input[j * stride + i] * 16;
#else
temp_in[j] = temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2); (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
#endif
}
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#else
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4); out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
#endif
}
} }
// Rows // Rows
......
...@@ -70,12 +70,20 @@ static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, ...@@ -70,12 +70,20 @@ static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) { int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
av1_fht16x32_c(src_diff, coeff, diff_stride, txfm_param);
#else
av1_fht16x32(src_diff, coeff, diff_stride, txfm_param); av1_fht16x32(src_diff, coeff, diff_stride, txfm_param);
#endif
} }
static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) { int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
av1_fht32x16_c(src_diff, coeff, diff_stride, txfm_param);
#else
av1_fht32x16(src_diff, coeff, diff_stride, txfm_param); av1_fht32x16(src_diff, coeff, diff_stride, txfm_param);
#endif
} }
static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment