Commit 7eb4454b authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add Daala TX to 8x16 and 16x8 transforms

Rectangular 8x16 and 16x8 will now use Daala TX when CONFIG_DAALA_TX8 and
CONFIG_DAALA_TX16 are both enabled.

Change-Id: I777d5433addb8ffd4a99f7e021768d4f8651008f
parent abd94510
...@@ -1275,6 +1275,26 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1275,6 +1275,26 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d IHT_8x16[] = { static const transform_2d IHT_8x16[] = {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
{ daala_idct16, daala_idct8 }, // DCT_DCT = 0
{ daala_idst16, daala_idct8 }, // ADST_DCT = 1
{ daala_idct16, daala_idst8 }, // DCT_ADST = 2
{ daala_idst16, daala_idst8 }, // ADST_ADST = 3
#if CONFIG_EXT_TX
{ daala_idst16, daala_idct8 }, // FLIPADST_DCT
{ daala_idct16, daala_idst8 }, // DCT_FLIPADST
{ daala_idst16, daala_idst8 }, // FLIPADST_FLIPADST
{ daala_idst16, daala_idst8 }, // ADST_FLIPADST
{ daala_idst16, daala_idst8 }, // FLIPADST_ADST
{ daala_idtx16, daala_idtx8 }, // IDTX
{ daala_idct16, daala_idtx8 }, // V_DCT
{ daala_idtx16, daala_idct8 }, // H_DCT
{ daala_idst16, daala_idtx8 }, // V_ADST
{ daala_idtx16, daala_idst8 }, // H_ADST
{ daala_idst16, daala_idtx8 }, // V_FLIPADST
{ daala_idtx16, daala_idst8 }, // H_FLIPADST
#endif
#else
{ aom_idct16_c, aom_idct8_c }, // DCT_DCT { aom_idct16_c, aom_idct8_c }, // DCT_DCT
{ aom_iadst16_c, aom_idct8_c }, // ADST_DCT { aom_iadst16_c, aom_idct8_c }, // ADST_DCT
{ aom_idct16_c, aom_iadst8_c }, // DCT_ADST { aom_idct16_c, aom_iadst8_c }, // DCT_ADST
...@@ -1292,6 +1312,7 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1292,6 +1312,7 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx16_c, aom_iadst8_c }, // H_ADST { iidtx16_c, aom_iadst8_c }, // H_ADST
{ aom_iadst16_c, iidtx8_c }, // V_FLIPADST { aom_iadst16_c, iidtx8_c }, // V_FLIPADST
{ iidtx16_c, aom_iadst8_c }, // H_FLIPADST { iidtx16_c, aom_iadst8_c }, // H_FLIPADST
#endif
#endif #endif
}; };
...@@ -1307,20 +1328,56 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1307,20 +1328,56 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
#endif #endif
// Multi-way scaling matrix (bits):
// LGT/AV1 row, AV1 col input+0, rowTX+1, mid+.5, colTX+1.5, out-6 == -3
// LGT row, Daala col input+0, rowTX+1, mid+0, colTX+0, out-4 == -3
// Daala row, LGT col N/A (no 16-point LGT)
// Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3
// inverse transform row vectors and transpose // inverse transform row vectors and transpose
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
#if CONFIG_LGT #if CONFIG_LGT
if (use_lgt_row) if (use_lgt_row) {
// Scaling cases 1 and 2 above
// No input scaling
// Row transform (LGT; scales up 1 bit)
ilgt8(input, outtmp, lgtmtx_row[0]); ilgt8(input, outtmp, lgtmtx_row[0]);
else // Transpose and mid scaling
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
// Mid scaling case 2
tmp[j][i] = outtmp[j];
#else
// Mid scaling case 1
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
#endif #endif
IHT_8x16[tx_type].rows(input, outtmp); }
} else {
#endif
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
tran_low_t temp_in[8];
// Input scaling case 4
for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
// Row transform (Daala does not scale)
IHT_8x16[tx_type].rows(temp_in, outtmp);
// Transpose (no mid scaling)
for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
#else
// Case 1; no input scaling
// Row transform (AV1 scales up 1 bit)
IHT_8x16[tx_type].rows(input, outtmp);
// Transpose and mid scaling up .5 bits
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
#endif
#if CONFIG_LGT
}
#endif
input += n; input += n;
} }
// inverse transform column vectors // inverse transform column vectors
// AV1 column TX scales up by 1.5 bit, Daala does not scale
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
IHT_8x16[tx_type].cols(tmp[i], out[i]); IHT_8x16[tx_type].cols(tmp[i], out[i]);
} }
...@@ -1334,7 +1391,13 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1334,7 +1391,13 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < n; ++j) { for (j = 0; j < n; ++j) {
int d = i * stride + j; int d = i * stride + j;
int s = j * outstride + i; int s = j * outstride + i;
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
// Output scaling cases 2 and 4
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
// Output scaling case 1
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
#endif
} }
} }
} }
...@@ -1349,6 +1412,26 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1349,6 +1412,26 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d IHT_16x8[] = { static const transform_2d IHT_16x8[] = {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
{ daala_idct8, daala_idct16 }, // DCT_DCT = 0
{ daala_idst8, daala_idct16 }, // ADST_DCT = 1
{ daala_idct8, daala_idst16 }, // DCT_ADST = 2
{ daala_idst8, daala_idst16 }, // ADST_ADST = 3
#if CONFIG_EXT_TX
{ daala_idst8, daala_idct16 }, // FLIPADST_DCT
{ daala_idct8, daala_idst16 }, // DCT_FLIPADST
{ daala_idst8, daala_idst16 }, // FLIPADST_FLIPADST
{ daala_idst8, daala_idst16 }, // ADST_FLIPADST
{ daala_idst8, daala_idst16 }, // FLIPADST_ADST
{ daala_idtx8, daala_idtx16 }, // IDTX
{ daala_idct8, daala_idtx16 }, // V_DCT
{ daala_idtx8, daala_idct16 }, // H_DCT
{ daala_idst8, daala_idtx16 }, // V_ADST
{ daala_idtx8, daala_idst16 }, // H_ADST
{ daala_idst8, daala_idtx16 }, // V_FLIPADST
{ daala_idtx8, daala_idst16 }, // H_FLIPADST
#endif
#else
{ aom_idct8_c, aom_idct16_c }, // DCT_DCT { aom_idct8_c, aom_idct16_c }, // DCT_DCT
{ aom_iadst8_c, aom_idct16_c }, // ADST_DCT { aom_iadst8_c, aom_idct16_c }, // ADST_DCT
{ aom_idct8_c, aom_iadst16_c }, // DCT_ADST { aom_idct8_c, aom_iadst16_c }, // DCT_ADST
...@@ -1366,6 +1449,7 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1366,6 +1449,7 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx8_c, aom_iadst16_c }, // H_ADST { iidtx8_c, aom_iadst16_c }, // H_ADST
{ aom_iadst8_c, iidtx16_c }, // V_FLIPADST { aom_iadst8_c, iidtx16_c }, // V_FLIPADST
{ iidtx8_c, aom_iadst16_c }, // H_FLIPADST { iidtx8_c, aom_iadst16_c }, // H_FLIPADST
#endif
#endif #endif
}; };
...@@ -1382,15 +1466,43 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1382,15 +1466,43 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
#endif #endif
// Multi-way scaling matrix (bits):
// AV1 row, LGT/AV1 col input+0, rowTX+1.5, mid+.5, colTX+1, out-6 == -3
// LGT row, Daala col N/A (no 16-point LGT)
// Daala row, LGT col input+1, rowTX+0, mid+1, colTX+1, out-6 == -3
// Daala row, col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3
// inverse transform row vectors and transpose // inverse transform row vectors and transpose
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
tran_low_t temp_in[16];
// Input scaling cases 3 and 4
for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
// Daala row TX, no scaling
IHT_16x8[tx_type].rows(temp_in, outtmp);
// Transpose and mid scaling
#if CONFIG_LGT
if (use_lgt_col)
// Case 3
for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 2;
else
#endif
// Case 4
for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j];
#else
// Case 1
// No input scaling
// Row transform, AV1 scales up by 1.5 bits
IHT_16x8[tx_type].rows(input, outtmp); IHT_16x8[tx_type].rows(input, outtmp);
// Transpose and mid scaling up .5 bits
for (j = 0; j < n2; ++j) for (j = 0; j < n2; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
#endif
input += n2; input += n2;
} }
// inverse transform column vectors // inverse transform column vectors
// AV!/LGT scales up by 1 bit, Daala does not scale
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
#if CONFIG_LGT #if CONFIG_LGT
if (use_lgt_col) if (use_lgt_col)
...@@ -1409,7 +1521,20 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride, ...@@ -1409,7 +1521,20 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < n2; ++j) { for (j = 0; j < n2; ++j) {
int d = i * stride + j; int d = i * stride + j;
int s = j * outstride + i; int s = j * outstride + i;
// Output scaling
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
#if CONFIG_LGT
if (use_lgt_col)
// case 3
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
else
#endif
// case 4
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
// case 1
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6)); dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
#endif
} }
} }
} }
...@@ -2480,7 +2605,7 @@ static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest, ...@@ -2480,7 +2605,7 @@ static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest, static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) { int stride, const TxfmParam *txfm_param) {
#if CONFIG_LGT #if CONFIG_LGT || (CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
av1_iht8x16_128_add_c(input, dest, stride, txfm_param); av1_iht8x16_128_add_c(input, dest, stride, txfm_param);
#else #else
av1_iht8x16_128_add(input, dest, stride, txfm_param); av1_iht8x16_128_add(input, dest, stride, txfm_param);
...@@ -2489,7 +2614,7 @@ static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest, ...@@ -2489,7 +2614,7 @@ static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest, static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) { int stride, const TxfmParam *txfm_param) {
#if CONFIG_LGT #if CONFIG_LGT || (CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
av1_iht16x8_128_add_c(input, dest, stride, txfm_param); av1_iht16x8_128_add_c(input, dest, stride, txfm_param);
#else #else
av1_iht16x8_128_add(input, dest, stride, txfm_param); av1_iht16x8_128_add(input, dest, stride, txfm_param);
......
...@@ -1881,6 +1881,26 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1881,6 +1881,26 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d FHT[] = { static const transform_2d FHT[] = {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
{ daala_fdct16, daala_fdct8 }, // DCT_DCT
{ daala_fdst16, daala_fdct8 }, // ADST_DCT
{ daala_fdct16, daala_fdst8 }, // DCT_ADST
{ daala_fdst16, daala_fdst8 }, // ADST_ADST
#if CONFIG_EXT_TX
{ daala_fdst16, daala_fdct8 }, // FLIPADST_DCT
{ daala_fdct16, daala_fdst8 }, // DCT_FLIPADST
{ daala_fdst16, daala_fdst8 }, // FLIPADST_FLIPADST
{ daala_fdst16, daala_fdst8 }, // ADST_FLIPADST
{ daala_fdst16, daala_fdst8 }, // FLIPADST_ADST
{ daala_idtx16, daala_idtx8 }, // IDTX
{ daala_fdct16, daala_idtx8 }, // V_DCT
{ daala_idtx16, daala_fdct8 }, // H_DCT
{ daala_fdst16, daala_idtx8 }, // V_ADST
{ daala_idtx16, daala_fdst8 }, // H_ADST
{ daala_fdst16, daala_idtx8 }, // V_FLIPADST
{ daala_idtx16, daala_fdst8 }, // H_FLIPADST
#endif
#else
{ fdct16, fdct8 }, // DCT_DCT { fdct16, fdct8 }, // DCT_DCT
{ fadst16, fdct8 }, // ADST_DCT { fadst16, fdct8 }, // ADST_DCT
{ fdct16, fadst8 }, // DCT_ADST { fdct16, fadst8 }, // DCT_ADST
...@@ -1898,6 +1918,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1898,6 +1918,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx16, fadst8 }, // H_ADST { fidtx16, fadst8 }, // H_ADST
{ fadst16, fidtx8 }, // V_FLIPADST { fadst16, fidtx8 }, // V_FLIPADST
{ fidtx16, fadst8 }, // H_FLIPADST { fidtx16, fadst8 }, // H_FLIPADST
#endif
#endif #endif
}; };
const transform_2d ht = FHT[tx_type]; const transform_2d ht = FHT[tx_type];
...@@ -1916,26 +1937,65 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1916,26 +1937,65 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
#endif #endif
// Multi-way scaling matrix (bits):
// LGT/AV1 row, AV1 col input+2.5, rowTX+1, mid-2, colTX+1.5, out+0 == 3
// LGT row, Daala col input+3, rowTX+1, mid+0, colTX+0, out-1 == 3
// Daala row, LGT col N/A (no 16-point LGT)
// Daala row, col input+4, rowTX+0, mid+0, colTX+0, out-1 == 3
// Rows // Rows
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) // Input scaling
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
#if CONFIG_LGT
// Input scaling when LGT might be active (cases 2, 4 above)
temp_in[j] = input[i * stride + j] * (use_lgt_row ? 2 : 4) * 4;
#else
// Input scaling when LGT is not possible, Daala only (case 4 above)
temp_in[j] = input[i * stride + j] * 16;
#endif
#else
// Input scaling when Daala is not possible, LGT/AV1 only (case 1 above)
temp_in[j] = temp_in[j] =
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2); (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
#endif
}
// Row transform (AV1/LGT scale up 1 bit, Daala does not scale)
#if CONFIG_LGT #if CONFIG_LGT
if (use_lgt_row) if (use_lgt_row)
flgt8(temp_in, temp_out, lgtmtx_row[0]); flgt8(temp_in, temp_out, lgtmtx_row[0]);
else else
#endif #endif
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
// Mid scaling
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
// mid scaling: only cases 2 and 4 possible
out[j * n2 + i] = temp_out[j];
#else
// mid scaling: only case 1 possible
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#endif
}
} }
// Columns // Columns
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
// Column transform (AV1/LGT scale up 1.5 bits, Daala does not scale)
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j) output[i + j * n] = temp_out[j]; for (j = 0; j < n2; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
// Output scaling (cases 2 and 3 above)
output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
#else
// Output scaling (case 1 above)
output[i + j * n] = temp_out[j];
#endif
}
} }
// Note: overall scale factor of transform is 8 times unitary // Note: overall scale factor of transform is 8 times unitary
} }
...@@ -1950,6 +2010,26 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1950,6 +2010,26 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
#endif #endif
static const transform_2d FHT[] = { static const transform_2d FHT[] = {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
{ daala_fdct8, daala_fdct16 }, // DCT_DCT
{ daala_fdst8, daala_fdct16 }, // ADST_DCT
{ daala_fdct8, daala_fdst16 }, // DCT_ADST
{ daala_fdst8, daala_fdst16 }, // ADST_ADST
#if CONFIG_EXT_TX
{ daala_fdst8, daala_fdct16 }, // FLIPADST_DCT
{ daala_fdct8, daala_fdst16 }, // DCT_FLIPADST
{ daala_fdst8, daala_fdst16 }, // FLIPADST_FLIPADST
{ daala_fdst8, daala_fdst16 }, // ADST_FLIPADST
{ daala_fdst8, daala_fdst16 }, // FLIPADST_ADST
{ daala_idtx8, daala_idtx16 }, // IDTX
{ daala_fdct8, daala_idtx16 }, // V_DCT
{ daala_idtx8, daala_fdct16 }, // H_DCT
{ daala_fdst8, daala_idtx16 }, // V_ADST
{ daala_idtx8, daala_fdst16 }, // H_ADST
{ daala_fdst8, daala_idtx16 }, // V_FLIPADST
{ daala_idtx8, daala_fdst16 }, // H_FLIPADST
#endif
#else
{ fdct8, fdct16 }, // DCT_DCT { fdct8, fdct16 }, // DCT_DCT
{ fadst8, fdct16 }, // ADST_DCT { fadst8, fdct16 }, // ADST_DCT
{ fdct8, fadst16 }, // DCT_ADST { fdct8, fadst16 }, // DCT_ADST
...@@ -1967,6 +2047,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1967,6 +2047,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx8, fadst16 }, // H_ADST { fidtx8, fadst16 }, // H_ADST
{ fadst8, fidtx16 }, // V_FLIPADST { fadst8, fidtx16 }, // V_FLIPADST
{ fidtx8, fadst16 }, // H_FLIPADST { fidtx8, fadst16 }, // H_FLIPADST
#endif
#endif #endif
}; };
const transform_2d ht = FHT[tx_type]; const transform_2d ht = FHT[tx_type];
...@@ -1985,26 +2066,65 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1985,26 +2066,65 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
#endif #endif
// Multi-way scaling matrix (bits):
// LGT/AV1 col, AV1 row input+2.5, colTX+1, mid-2, rowTX+1.5, out+0 == 3
// LGT col, Daala row input+3, colTX+1, mid+0, rowTX+0, out-1 == 3
// Daala col, LGT row N/A (no 16-point LGT)
// Daala col, row input+4, colTX+0, mid+0, rowTX+0, out-1 == 3
// Columns // Columns
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) // Input scaling
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
#if CONFIG_LGT
// Input scaling when LGT might be active (1, 2 above)
temp_in[j] = input[j * stride + i] * 4 * (use_lgt_col ? 2 : 4);
#else
// Input scaling when LGT is not possible, Daala only (4 above)
temp_in[j] = input[j * stride + i] * 16;
#endif
#else
// Input scaling when Daala is not possible, AV1/LGT only (1 above)
temp_in[j] = temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2); (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
#endif
}
// Column transform (AV1/LGT scale up 1 bit, Daala does not scale)
#if CONFIG_LGT #if CONFIG_LGT
if (use_lgt_col) if (use_lgt_col)
flgt8(temp_in, temp_out, lgtmtx_col[0]); flgt8(temp_in, temp_out, lgtmtx_col[0]);
else else
#endif #endif
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
// Mid scaling
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
// scaling cases 2 and 4 above
out[j * n2 + i] = temp_out[j];
#else
// Scaling case 1 above
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#endif
}
} }
// Rows // Rows
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
// Row transform (AV1 scales up 1.5 bits, Daala does not scale)
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j]; for (j = 0; j < n2; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
// Output scaing cases 2 and 4 above
output[j + i * n2] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
#else
// Ouptut scaling case 1 above
output[j + i * n2] = temp_out[j];
#endif
}
} }
// Note: overall scale factor of transform is 8 times unitary // Note: overall scale factor of transform is 8 times unitary
} }
......
...@@ -52,7 +52,7 @@ static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, ...@@ -52,7 +52,7 @@ static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) { int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_LGT #if CONFIG_LGT || (CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
av1_fht8x16_c(src_diff, coeff, diff_stride, txfm_param); av1_fht8x16_c(src_diff, coeff, diff_stride, txfm_param);
#else #else
av1_fht8x16(src_diff, coeff, diff_stride, txfm_param); av1_fht8x16(src_diff, coeff, diff_stride, txfm_param);
...@@ -61,7 +61,7 @@ static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, ...@@ -61,7 +61,7 @@ static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) { int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_LGT #if CONFIG_LGT || (CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
av1_fht16x8_c(src_diff, coeff, diff_stride, txfm_param); av1_fht16x8_c(src_diff, coeff, diff_stride, txfm_param);
#else #else
av1_fht16x8(src_diff, coeff, diff_stride, txfm_param); av1_fht16x8(src_diff, coeff, diff_stride, txfm_param);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment