Commit b2f82ebd authored by Nathan E. Egge's avatar Nathan E. Egge Committed by Nathan Egge
Browse files

daala_tx: Unify the asym and ortho DST designs.

This patch refactors the DST transforms so that the orthonormal and
 asymmetric transforms are now nearly identical (up to multiplicaiton
 constants and an extra set of shifts).
This means that the DST designs are now embeddable for every level
 and should address hardware concerns about gate area.

In addition, minor changes were made to improve transform accuracy:

 - all of the transforms now have perfect reconstruction for those
    computations outside the rotations, i.e., all +/- butterfly steps
    are exactly invertible
 - two multiplication constants were reduced below < 1.0 (better for
    SIMD and gives slightly improved accuracy)
 - the averaging bias is removed which saves an extra addition for each
    of the averaging steps

Additional averaging steps can be removed from the 8-point Type-IV DST
 giving a 68% reduction in MSE for the 32-point DCT, but has not been
 done in the event we use it in place of the 8-point Type-VII DST.

subset-1:

master-daala_tx@2017-12-10T22:38:19.651Z ->
 new-daala_tx@2017-12-10T22:37:50.844Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
0.0057 | -0.0210 | -0.1821 |   0.0085 | -0.0002 |  0.0147 |    -0.0674

Change-Id: Ib124eebf6f2e4b3c51c078d4e8f229fc5ec26171
parent e6579113
......@@ -31,6 +31,7 @@ set(AOM_AV1_COMMON_SOURCES
"${AOM_ROOT}/av1/common/convolve.h"
"${AOM_ROOT}/av1/common/daala_tx.c"
"${AOM_ROOT}/av1/common/daala_tx.h"
"${AOM_ROOT}/av1/common/daala_tx_kernels.h"
"${AOM_ROOT}/av1/common/debugmodes.c"
"${AOM_ROOT}/av1/common/entropy.c"
"${AOM_ROOT}/av1/common/entropy.h"
......
......@@ -26,6 +26,7 @@ AV1_COMMON_SRCS-yes += common/blockd.h
AV1_COMMON_SRCS-yes += common/common.h
AV1_COMMON_SRCS-yes += common/daala_tx.c
AV1_COMMON_SRCS-yes += common/daala_tx.h
AV1_COMMON_SRCS-yes += common/daala_tx_kernels.h
AV1_COMMON_SRCS-yes += common/daala_inv_txfm.c
AV1_COMMON_SRCS-yes += common/daala_inv_txfm.h
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/daala_tx_kernels.h
......
This diff is collapsed.
This diff is collapsed.
......@@ -982,6 +982,7 @@ static void od_col_tx4_add_hbd_avx2(unsigned char *output_pixels,
}
}
#if 0
static void od_row_idct4_avx2(int16_t *out, int rows, const tran_low_t *in) {
od_row_tx4_avx2(out, rows, in, od_idct4_kernel8_epi16);
}
......@@ -992,6 +993,7 @@ static void od_col_idct4_add_hbd_avx2(unsigned char *output_pixels,
od_col_tx4_add_hbd_avx2(output_pixels, output_stride, cols, in, bd,
od_idct4_kernel8_epi16);
}
#endif
static void od_row_idst4_avx2(int16_t *out, int rows, const tran_low_t *in) {
od_row_tx4_avx2(out, rows, in, od_idst_vii4_kernel8_epi16);
......@@ -1034,6 +1036,7 @@ typedef void (*od_tx8_mm256_kernel)(__m256i *r0, __m256i *r4, __m256i *r2,
__m256i *r6, __m256i *r1, __m256i *r5,
__m256i *r3, __m256i *r7);
#if 0
static void od_row_tx8_avx2(int16_t *out, int rows, const tran_low_t *in,
od_tx8_kernel8_epi16 kernel8_epi16,
od_tx8_mm256_kernel kernel8_epi32) {
......@@ -1176,6 +1179,7 @@ static void od_col_flip_idst8_add_hbd_avx2(unsigned char *output_pixels,
od_flip_idst8_kernel8_epi16,
od_flip_idst8_kernel16_epi16);
}
#endif
static void od_row_iidtx8_avx2(int16_t *out, int rows, const tran_low_t *in) {
od_row_iidtx_avx2(out, rows * 8, in);
......@@ -1201,6 +1205,7 @@ typedef void (*od_tx16_mm256_kernel)(__m256i *s0, __m256i *s4, __m256i *s2,
__m256i *sc, __m256i *sd, __m256i *se,
__m256i *sf);
#if 0
static void od_row_tx16_avx2(int16_t *out, int rows, const tran_low_t *in,
#if CONFIG_RECT_TX_EXT
od_tx16_kernel8_epi16 kernel8_epi16,
......@@ -1422,6 +1427,7 @@ static void od_col_flip_idst16_add_hbd_avx2(unsigned char *output_pixels,
od_flip_idst16_kernel8_epi16,
od_flip_idst16_kernel16_epi16);
}
#endif
static void od_row_iidtx16_avx2(int16_t *out, int rows, const tran_low_t *in) {
od_row_iidtx_avx2(out, rows * 16, in);
......@@ -1440,19 +1446,11 @@ typedef void (*daala_col_itx_add)(unsigned char *output_pixels,
static const daala_row_itx TX_ROW_MAP[TX_SIZES][TX_TYPES] = {
// 4-point transforms
{ od_row_idct4_avx2, od_row_idst4_avx2, od_row_flip_idst4_avx2,
od_row_iidtx4_avx2 },
{ NULL, od_row_idst4_avx2, od_row_flip_idst4_avx2, od_row_iidtx4_avx2 },
// 8-point transforms
{ od_row_idct8_avx2,
#if CONFIG_DAALA_TX_DST8
NULL, NULL,
#else
od_row_idst8_avx2, od_row_flip_idst8_avx2,
#endif
od_row_iidtx8_avx2 },
{ NULL, NULL, NULL, od_row_iidtx8_avx2 },
// 16-point transforms
{ od_row_idct16_avx2, od_row_idst16_avx2, od_row_flip_idst16_avx2,
od_row_iidtx16_avx2 },
{ NULL, NULL, NULL, od_row_iidtx16_avx2 },
// 32-point transforms
{ NULL, NULL, NULL, NULL },
#if CONFIG_TX64X64
......@@ -1480,19 +1478,12 @@ static const daala_col_itx_add TX_COL_MAP[2][TX_SIZES][TX_TYPES] = {
// High bit depth output
{
// 4-point transforms
{ od_col_idct4_add_hbd_avx2, od_col_idst4_add_hbd_avx2,
od_col_flip_idst4_add_hbd_avx2, od_col_iidtx4_add_hbd_avx2 },
{ NULL, od_col_idst4_add_hbd_avx2, od_col_flip_idst4_add_hbd_avx2,
od_col_iidtx4_add_hbd_avx2 },
// 8-point transforms
{ od_col_idct8_add_hbd_avx2,
#if CONFIG_DAALA_TX_DST8
NULL, NULL,
#else
od_col_idst8_add_hbd_avx2, od_col_flip_idst8_add_hbd_avx2,
#endif
od_col_iidtx8_add_hbd_avx2 },
{ NULL, NULL, NULL, od_col_iidtx8_add_hbd_avx2 },
// 16-point transforms
{ od_col_idct16_add_hbd_avx2, od_col_idst16_add_hbd_avx2,
od_col_flip_idst16_add_hbd_avx2, od_col_iidtx16_add_hbd_avx2 },
{ NULL, NULL, NULL, od_col_iidtx16_add_hbd_avx2 },
// 32-point transforms
{ NULL, NULL, NULL, NULL },
#if CONFIG_TX64X64
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment