Commit 2cb52baf authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add CONFIG_DAALA_DCT32 experiment.

This experiment replaces the 32-point Type-II DCT and 32-point
Type-IV DST scaling vp9 transforms with the 32-point orthonormal
Daala transforms.

subset-1:

    monty-square-baseline-s1-F3@2017-08-02T11:50:51.375Z ->
      monty-square-dct32-s1-F3@2017-08-02T11:50:18.859Z

      PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
    0.0000 |  0.0115 | -0.1044 |  -0.0185 | -0.0069 | -0.0603 |     0.0555

objective-1-fast (4 frames):

    monty-square-baseline-o1f-F3-l4-fine@2017-08-12T02:18:05.560Z ->
      monty-square-dct32-o1f-F3-l4-fine@2017-08-12T02:19:44.461Z

      PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
   -0.0269 | -0.0715 |     N/A |  -0.0547 | -0.0268 | -0.0590 |        N/A

Change-Id: Ib1bad991d82eb67956e94a6216298a84e908b169
parent 2bfa2065
......@@ -14,7 +14,8 @@
#include "./aom_dsp_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
CONFIG_DAALA_DCT32
#include "av1/common/daala_tx.h"
#endif
......@@ -881,6 +882,18 @@ void aom_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
}
}
#if CONFIG_DAALA_DCT32
void aom_idct32_c(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[32];
od_coeff y[32];
for (i = 0; i < 32; i++) y[i] = (od_coeff)input[i];
od_bin_idct32(x, 1, y);
for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i];
}
#else
void aom_idct32_c(const tran_low_t *input, tran_low_t *output) {
tran_low_t step1[32], step2[32];
tran_high_t temp1, temp2;
......@@ -1247,6 +1260,7 @@ void aom_idct32_c(const tran_low_t *input, tran_low_t *output) {
output[30] = WRAPLOW(step1[1] - step1[30]);
output[31] = WRAPLOW(step1[0] - step1[31]);
}
#endif
#if CONFIG_MRC_TX
void aom_imrc32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
......
This diff is collapsed.
......@@ -13,5 +13,7 @@ void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
#endif
......@@ -69,7 +69,13 @@ static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
for (i = 0; i < 32; ++i) {
#if CONFIG_DAALA_DCT32
output[i] = input[i];
#else
output[i] = input[i] * 4;
#endif
}
}
#if CONFIG_TX64X64
......@@ -82,6 +88,20 @@ static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
#endif // CONFIG_EXT_TX
// For use in lieu of ADST
#if CONFIG_DAALA_DCT32
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[16];
// No scaling within; Daala transforms are all orthonormal
for (i = 0; i < 16; ++i) {
inputhalf[i] = input[i];
}
for (i = 0; i < 16; ++i) {
output[i] = input[16 + i];
}
aom_idct16_c(inputhalf, output + 16);
}
#else
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[16];
......@@ -95,6 +115,7 @@ static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
aom_idct16_c(inputhalf, output + 16);
// Note overall scaling factor is 4 times orthogonal
}
#endif
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
......@@ -1279,7 +1300,7 @@ void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
#if CONFIG_EXT_TX
#if CONFIG_EXT_TX || CONFIG_DAALA_DCT32
void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
const TxfmParam *txfm_param) {
int tx_type = txfm_param->tx_type;
......@@ -1287,7 +1308,8 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
assert(tx_type == DCT_DCT);
#endif
static const transform_2d IHT_32[] = {
{ aom_idct32_c, aom_idct32_c }, // DCT_DCT
{ aom_idct32_c, aom_idct32_c }, // DCT_DCT
#if CONFIG_EXT_TX
{ ihalfright32_c, aom_idct32_c }, // ADST_DCT
{ aom_idct32_c, ihalfright32_c }, // DCT_ADST
{ ihalfright32_c, ihalfright32_c }, // ADST_ADST
......@@ -1303,6 +1325,7 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx32_c, ihalfright32_c }, // H_ADST
{ ihalfright32_c, iidtx32_c }, // V_FLIPADST
{ iidtx32_c, ihalfright32_c }, // H_FLIPADST
#endif
};
int i, j;
......@@ -1313,14 +1336,24 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors
for (i = 0; i < 32; ++i) {
#if CONFIG_DAALA_DCT32
tran_low_t temp_in[32];
for (j = 0; j < 32; j++) temp_in[j] = input[j] * 2;
IHT_32[tx_type].rows(temp_in, out[i]);
#else
IHT_32[tx_type].rows(input, out[i]);
#endif
input += 32;
}
// transpose
for (i = 0; i < 32; i++) {
for (j = 0; j < 32; j++) {
#if CONFIG_DAALA_DCT32
tmp[j][i] = out[i][j] * 4;
#else
tmp[j][i] = out[i][j];
#endif
}
}
......@@ -1334,11 +1367,15 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < 32; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
#if CONFIG_DAALA_DCT32
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
#endif
}
}
}
#endif // CONFIG_EXT_TX
#endif // CONFIG_EXT_TX || CONFIG_DAALA_DCT32
#if CONFIG_TX64X64
void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -1513,6 +1550,7 @@ static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
#endif // CONFIG_MRC_TX
#if !CONFIG_DAALA_DCT32
static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
const TxfmParam *txfm_param) {
#if CONFIG_ADAPT_SCAN
......@@ -1535,6 +1573,7 @@ static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
else
aom_idct32x32_1024_add(input, dest, stride);
}
#endif
#if CONFIG_TX64X64
static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -1798,7 +1837,13 @@ static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const TX_TYPE tx_type = txfm_param->tx_type;
switch (tx_type) {
#if !CONFIG_DAALA_DCT32
case DCT_DCT: idct32x32_add(input, dest, stride, txfm_param); break;
#else
case DCT_DCT:
av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
break;
#endif
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
......
......@@ -21,7 +21,8 @@
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
CONFIG_DAALA_DCT32
#include "av1/common/daala_tx.h"
#endif
......@@ -369,6 +370,18 @@ static void fdct16(const tran_low_t *input, tran_low_t *output) {
}
#endif
#if CONFIG_DAALA_DCT32
static void fdct32(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[32];
od_coeff y[32];
for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i];
od_bin_fdct32(y, x, 1);
for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i];
}
#else
static void fdct32(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[32];
......@@ -766,6 +779,7 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) {
range_check(output, 32, 18);
}
#endif
#ifndef AV1_DCT_GTEST
......@@ -1075,6 +1089,20 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
#endif
// For use in lieu of ADST
#if CONFIG_DAALA_DCT32
static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[16];
// No scaling within; Daala transforms are all orthonormal
for (i = 0; i < 16; ++i) {
output[16 + i] = input[i];
}
for (i = 0; i < 16; ++i) {
inputhalf[i] = input[i + 16];
}
fdct16(inputhalf, output);
}
#else
static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[16];
......@@ -1088,6 +1116,7 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
fdct16(inputhalf, output);
// Note overall scaling factor is 4 times orthogonal
}
#endif
#if CONFIG_MRC_TX
static void get_masked_residual32(const int16_t **input, int *input_stride,
......@@ -1214,7 +1243,13 @@ static void fidtx16(const tran_low_t *input, tran_low_t *output) {
static void fidtx32(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
for (i = 0; i < 32; ++i) {
#if CONFIG_DAALA_DCT32
output[i] = input[i];
#else
output[i] = input[i] * 4;
#endif
}
}
static void copy_block(const int16_t *src, int src_stride, int l, int w,
......@@ -2467,17 +2502,30 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
for (j = 0; j < 32; ++j) {
#if CONFIG_DAALA_DCT32
temp_in[j] = input[j * stride + i] * 16;
#else
temp_in[j] = input[j * stride + i] * 4;
#endif
}
ht.cols(temp_in, temp_out);
for (j = 0; j < 32; ++j)
for (j = 0; j < 32; ++j) {
#if CONFIG_DAALA_DCT32
out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
#else
out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
#endif
}
}
// Rows
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
ht.rows(temp_in, temp_out);
for (j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j];
for (j = 0; j < 32; ++j) {
output[j + i * 32] = temp_out[j];
}
}
}
......
......@@ -126,6 +126,7 @@ set(CONFIG_COMPOUND_SINGLEREF 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CONVOLVE_ROUND 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CDEF 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT16 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT32 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT4 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT8 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DIST 0 CACHE NUMBER "AV1 experiment flag.")
......
......@@ -247,7 +247,8 @@ if (CONFIG_DAALA_DCT4 AND NOT CONFIG_DCT_ONLY)
change_config_and_warn(CONFIG_DCT_ONLY 1 CONFIG_DAALA_DCT4)
endif()
if (CONFIG_DAALA_DCT4 OR CONFIG_DAALA_DCT8 OR CONFIG_DAALA_DCT16)
if (CONFIG_DAALA_DCT4 OR CONFIG_DAALA_DCT8 OR CONFIG_DAALA_DCT16 OR
CONFIG_DAALA_DCT32)
if (HAVE_MMX)
change_config_and_warn(HAVE_MMX 0 CONFIG_DAALA_DCTx)
endif()
......
......@@ -296,6 +296,7 @@ EXPERIMENT_LIST="
daala_dct4
daala_dct8
daala_dct16
daala_dct32
cb4x4
chroma_2x2
chroma_sub8x8
......@@ -579,7 +580,10 @@ post_process_cmdline() {
if enabled daala_dct4; then
enable_feature dct_only
fi
if enabled daala_dct4 || enabled daala_dct8 || enabled daala_dct16; then
if enabled daala_dct4 ||
enabled daala_dct8 ||
enabled daala_dct16 ||
enabled daala_dct32; then
disable_feature mmx
disable_feature rect_tx
disable_feature var_tx
......
......@@ -23,7 +23,8 @@
#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
#define AV1_DCT_GTEST
#include "av1/encoder/dct.c"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
CONFIG_DAALA_DCT32
#include "av1/common/daala_tx.c"
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment