Commit 6a2a75b6 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add Daala TX to rectangular 32x64 and 64x32 transforms

This patch adds Daala TX transforms ot the 32x64 and 64x32 transform
block sizes using Q3 (up 4, down 1) scaling.

subset 1:
monty-daalaTX-fulltest-Daalabaseline-s1@2017-11-07T00:01:46.582Z ->
 monty-daalaTX-LBD-Daala32x64-s1-Z@2017-11-07T06:10:58.523Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0112 | -0.0769 |  0.0799 |   0.0567 | 0.0099 | -0.0077 |    -0.0446

objective 1 fast:
monty-daalaTX-fulltest-Daalabaseline-o1f4@2017-11-07T05:59:16.553Z ->
 monty-daalaTX-LBD-Daala32x64-o1f4-Z@2017-11-07T06:10:11.519Z

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0190 |  0.0926 | -0.0730 |  -0.0516 | -0.0037 | -0.0588 |     0.1310

Change-Id: I6246ecba388ae81deadc7b306dc3404fa7869aab
parent 125c0fca
......@@ -1768,6 +1768,24 @@ void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
assert(tx_type == DCT_DCT);
#endif
static const transform_2d IHT_64x32[] = {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
{ daala_idct32, daala_idct64 }, // DCT_DCT
{ daala_idst32, daala_idct64 }, // ADST_DCT
{ daala_idct32, daala_idst64 }, // DCT_ADST
{ daala_idst32, daala_idst64 }, // ADST_ADST
{ daala_idst32, daala_idct64 }, // FLIPADST_DCT
{ daala_idct32, daala_idst64 }, // DCT_FLIPADST
{ daala_idst32, daala_idst64 }, // FLIPADST_FLIPADST
{ daala_idst32, daala_idst64 }, // ADST_FLIPADST
{ daala_idst32, daala_idst64 }, // FLIPADST_ADST
{ daala_idtx32, daala_idtx64 }, // IDTX
{ daala_idct32, daala_idtx64 }, // V_DCT
{ daala_idtx32, daala_idct64 }, // H_DCT
{ daala_idst32, daala_idtx64 }, // V_ADST
{ daala_idtx32, daala_idst64 }, // H_ADST
{ daala_idst32, daala_idtx64 }, // V_FLIPADST
{ daala_idtx32, daala_idst64 }, // H_FLIPADST
#else
{ aom_idct32_c, idct64_row_c }, // DCT_DCT
{ ihalfright32_c, idct64_row_c }, // ADST_DCT
{ aom_idct32_c, ihalfright64_c }, // DCT_ADST
......@@ -1784,6 +1802,7 @@ void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx32_c, ihalfright64_c }, // H_ADST
{ ihalfright32_c, iidtx64_c }, // V_FLIPADST
{ iidtx32_c, ihalfright64_c }, // H_FLIPADST
#endif
};
const int n = 32;
const int n2 = 64;
......@@ -1795,9 +1814,16 @@ void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors and transpose
for (i = 0; i < n; ++i) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
tran_low_t temp_in[64];
for (j = 0; j < n2; j++) temp_in[j] = input[j] * 8;
IHT_64x32[tx_type].rows(temp_in, outtmp);
for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j];
#else
IHT_64x32[tx_type].rows(input, outtmp);
for (j = 0; j < n2; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
#endif
input += n2;
}
......@@ -1811,7 +1837,11 @@ void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < n2; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
#endif
}
}
}
......@@ -1826,6 +1856,24 @@ void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
assert(tx_type == DCT_DCT);
#endif
static const transform_2d IHT_32x64[] = {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
{ daala_idct64, daala_idct32 }, // DCT_DCT
{ daala_idst64, daala_idct32 }, // ADST_DCT
{ daala_idct64, daala_idst32 }, // DCT_ADST
{ daala_idst64, daala_idst32 }, // ADST_ADST
{ daala_idst64, daala_idct32 }, // FLIPADST_DCT
{ daala_idct64, daala_idst32 }, // DCT_FLIPADST
{ daala_idst64, daala_idst32 }, // FLIPADST_FLIPADST
{ daala_idst64, daala_idst32 }, // ADST_FLIPADST
{ daala_idst64, daala_idst32 }, // FLIPADST_ADST
{ daala_idtx64, daala_idtx32 }, // IDTX
{ daala_idct64, daala_idtx32 }, // V_DCT
{ daala_idtx64, daala_idct32 }, // H_DCT
{ daala_idst64, daala_idtx32 }, // V_ADST
{ daala_idtx64, daala_idst32 }, // H_ADST
{ daala_idst64, daala_idtx32 }, // V_FLIPADST
{ daala_idtx64, daala_idst32 }, // H_FLIPADST
#else
{ idct64_col_c, aom_idct32_c }, // DCT_DCT
{ ihalfright64_c, aom_idct32_c }, // ADST_DCT
{ idct64_col_c, ihalfright32_c }, // DCT_ADST
......@@ -1842,6 +1890,7 @@ void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx64_c, ihalfright32_c }, // H_ADST
{ ihalfright64_c, iidtx32_c }, // V_FLIPADST
{ iidtx64_c, ihalfright32_c }, // H_FLIPADST
#endif
};
const int n = 32;
......@@ -1853,9 +1902,16 @@ void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors and transpose
for (i = 0; i < n2; ++i) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
tran_low_t temp_in[32];
for (j = 0; j < n; j++) temp_in[j] = input[j] * 8;
IHT_32x64[tx_type].rows(temp_in, outtmp);
for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
#else
IHT_32x64[tx_type].rows(input, outtmp);
for (j = 0; j < n; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
#endif
input += n;
}
......@@ -1869,7 +1925,11 @@ void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < n; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
#endif
}
}
}
......@@ -2158,12 +2218,20 @@ static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
#if CONFIG_TX64X64
static void inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
#if CONFIG_DAALA_TX64 && CONFIG_DAALA_TX32
av1_iht32x64_2048_add_c(input, dest, stride, txfm_param);
#else
av1_iht32x64_2048_add(input, dest, stride, txfm_param);
#endif
}
static void inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
#if CONFIG_DAALA_TX64 && CONFIG_DAALA_TX32
av1_iht64x32_2048_add_c(input, dest, stride, txfm_param);
#else
av1_iht64x32_2048_add(input, dest, stride, txfm_param);
#endif
}
#endif // CONFIG_TX64X64
......
......@@ -2713,6 +2713,24 @@ void av1_fht64x32_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
{ daala_fdct32, daala_fdct64 }, // DCT_DCT
{ daala_fdst32, daala_fdct64 }, // ADST_DCT
{ daala_fdct32, daala_fdst64 }, // DCT_ADST
{ daala_fdst32, daala_fdst64 }, // ADST_ADST
{ daala_fdst32, daala_fdct64 }, // FLIPADST_DCT
{ daala_fdct32, daala_fdst64 }, // DCT_FLIPADST
{ daala_fdst32, daala_fdst64 }, // FLIPADST_FLIPADST
{ daala_fdst32, daala_fdst64 }, // ADST_FLIPADST
{ daala_fdst32, daala_fdst64 }, // FLIPADST_ADST
{ daala_idtx32, daala_idtx64 }, // IDTX
{ daala_fdct32, daala_idtx64 }, // V_DCT
{ daala_idtx32, daala_fdct64 }, // H_DCT
{ daala_fdst32, daala_idtx64 }, // V_ADST
{ daala_idtx32, daala_fdst64 }, // H_ADST
{ daala_fdst32, daala_idtx64 }, // V_FLIPADST
{ daala_idtx32, daala_fdst64 }, // H_FLIPADST
#else
{ fdct32, fdct64_row }, // DCT_DCT
{ fhalfright32, fdct64_row }, // ADST_DCT
{ fdct32, fhalfright64 }, // DCT_ADST
......@@ -2729,6 +2747,7 @@ void av1_fht64x32_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx32, fhalfright64 }, // H_ADST
{ fhalfright32, fidtx64 }, // V_FLIPADST
{ fidtx32, fhalfright64 }, // H_FLIPADST
#endif
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[2048];
......@@ -2741,20 +2760,36 @@ void av1_fht64x32_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
temp_in[j] = input[j * stride + i] * 16;
#else
temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
#endif
}
ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
out[j * n2 + i] = temp_out[j];
#else
out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#endif
}
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
for (j = 0; j < n2; ++j) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
output[j + i * n2] =
(tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 3);
#else
output[j + i * n2] =
(tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#endif
}
}
// Zero out right 32x32 area.
......@@ -2773,6 +2808,24 @@ void av1_fht32x64_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
{ daala_fdct64, daala_fdct32 }, // DCT_DCT
{ daala_fdst64, daala_fdct32 }, // ADST_DCT
{ daala_fdct64, daala_fdst32 }, // DCT_ADST
{ daala_fdst64, daala_fdst32 }, // ADST_ADST
{ daala_fdst64, daala_fdct32 }, // FLIPADST_DCT
{ daala_fdct64, daala_fdst32 }, // DCT_FLIPADST
{ daala_fdst64, daala_fdst32 }, // FLIPADST_FLIPADST
{ daala_fdst64, daala_fdst32 }, // ADST_FLIPADST
{ daala_fdst64, daala_fdst32 }, // FLIPADST_ADST
{ daala_idtx64, daala_idtx32 }, // IDTX
{ daala_fdct64, daala_idtx32 }, // V_DCT
{ daala_idtx64, daala_fdct32 }, // H_DCT
{ daala_fdst64, daala_idtx32 }, // V_ADST
{ daala_idtx64, daala_fdst32 }, // H_ADST
{ daala_fdst64, daala_idtx32 }, // V_FLIPADST
{ daala_idtx64, daala_fdst32 }, // H_FLIPADST
#else
{ fdct64_row, fdct32 }, // DCT_DCT
{ fhalfright64, fdct32 }, // ADST_DCT
{ fdct64_row, fhalfright32 }, // DCT_ADST
......@@ -2789,6 +2842,7 @@ void av1_fht32x64_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx64, fhalfright32 }, // H_ADST
{ fhalfright64, fidtx32 }, // V_FLIPADST
{ fidtx64, fhalfright32 }, // H_FLIPADST
#endif
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[32 * 64];
......@@ -2801,19 +2855,34 @@ void av1_fht32x64_c(const int16_t *input, tran_low_t *output, int stride,
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
temp_in[j] = input[i * stride + j] * 16;
#else
temp_in[j] = (tran_low_t)fdct_round_shift(input[i * stride + j] * Sqrt2);
#endif
}
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
out[j * n2 + i] = temp_out[j];
#else
out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#endif
}
}
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j)
for (j = 0; j < n2; ++j) {
#if CONFIG_DAALA_TX32 && CONFIG_DAALA_TX64
output[i + j * n] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 3);
#else
output[i + j * n] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#endif
}
}
// Zero out the bottom 32x32 area.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment