Commit cb9c1c52 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add CONFIG_DAALA_DCT16 experiment.

This experiment replaces the 16-point Type-II DCT and 16-point Type-IV
DST scaling vp9 transforms with the 16-point orthonormal Daala
transforms.  These have reduced complexity and are perfect
reconstruction.  There is currently no net coding performance impact.

subset-1:

  monty-square-baseline-s1-F@2017-07-23T03:43:45.042Z ->
     monty-square-dct16-s1-F@2017-07-23T03:42:29.805Z

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0152 | -0.0028 | -0.0929 |  -0.0432 | -0.0457 | -0.0425 |    -0.0237

  objective-1-fast:

  monty-square-baseline-o1f-F@2017-07-23T03:44:19.973Z ->
     monty-square-dct16-o1f-F@2017-07-23T03:43:22.549Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0305 |  0.0926 | -0.1600 |   0.0471 | 0.0219 | -0.0075 |     0.0135

Change-Id: I54fed26d65fd8450693334bb400b1fafd7e0dacb
parent f6eaa159
......@@ -14,7 +14,7 @@
#include "./aom_dsp_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16
#include "av1/common/daala_tx.h"
#endif
......@@ -429,6 +429,18 @@ void aom_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
}
}
#if CONFIG_DAALA_DCT16
void aom_idct16_c(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[16];
od_coeff y[16];
for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
od_bin_idct16(x, 1, y);
for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
}
#else
void aom_idct16_c(const tran_low_t *input, tran_low_t *output) {
tran_low_t step1[16], step2[16];
tran_high_t temp1, temp2;
......@@ -593,6 +605,7 @@ void aom_idct16_c(const tran_low_t *input, tran_low_t *output) {
output[14] = WRAPLOW(step2[1] - step2[14]);
output[15] = WRAPLOW(step2[0] - step2[15]);
}
#endif // CONFIG_DAALA_DCT16
void aom_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
......@@ -619,6 +632,18 @@ void aom_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
}
}
#if CONFIG_DAALA_DCT16
void aom_iadst16_c(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[16];
od_coeff y[16];
for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
od_bin_idst16(x, 1, y);
for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
}
#else
void aom_iadst16_c(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
......@@ -789,6 +814,7 @@ void aom_iadst16_c(const tran_low_t *input, tran_low_t *output) {
output[14] = WRAPLOW(x9);
output[15] = WRAPLOW(-x1);
}
#endif
void aom_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
......
This diff is collapsed.
......@@ -9,5 +9,9 @@ void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
#endif
......@@ -58,8 +58,13 @@ static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 16; ++i)
for (i = 0; i < 16; ++i) {
#if CONFIG_DAALA_DCT16
output[i] = input[i];
#else
output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
#endif
}
}
static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
......@@ -1236,7 +1241,13 @@ void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors
for (i = 0; i < 16; ++i) {
#if CONFIG_DAALA_DCT16
tran_low_t temp_in[16];
for (j = 0; j < 16; j++) temp_in[j] = input[j] << 1;
IHT_16[tx_type].rows(temp_in, out[i]);
#else
IHT_16[tx_type].rows(input, out[i]);
#endif
input += 16;
}
......@@ -1259,7 +1270,11 @@ void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < 16; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
#if CONFIG_DAALA_DCT16
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
#endif
}
}
}
......@@ -1440,6 +1455,7 @@ static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
}
#endif
#if !CONFIG_DAALA_DCT16
static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
const TxfmParam *txfm_param) {
// The calculation can be simplified if there are not many non-zero dct
......@@ -1462,6 +1478,7 @@ static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
else
aom_idct16x16_256_add(input, dest, stride);
}
#endif
#if CONFIG_MRC_TX
static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -1740,7 +1757,11 @@ static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const TX_TYPE tx_type = txfm_param->tx_type;
switch (tx_type) {
#if !CONFIG_DAALA_DCT16
case DCT_DCT: idct16x16_add(input, dest, stride, txfm_param); break;
#else
case DCT_DCT:
#endif
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
......
......@@ -21,7 +21,7 @@
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16
#include "av1/common/daala_tx.h"
#endif
......@@ -182,6 +182,18 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
}
#endif
#if CONFIG_DAALA_DCT16
static void fdct16(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[16];
od_coeff y[16];
for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
od_bin_fdct16(y, x, 1);
for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
}
#else
static void fdct16(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[16];
......@@ -355,6 +367,7 @@ static void fdct16(const tran_low_t *input, tran_low_t *output) {
range_check(output, 16, 16);
}
#endif
static void fdct32(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
......@@ -880,6 +893,18 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
}
#endif
#if CONFIG_DAALA_DCT16
static void fadst16(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[16];
od_coeff y[16];
for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
od_bin_fdst16(y, x, 1);
for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
}
#else
static void fadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
......@@ -1047,6 +1072,7 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
output[14] = (tran_low_t)x9;
output[15] = (tran_low_t)-x1;
}
#endif
// For use in lieu of ADST
static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
......@@ -1169,8 +1195,13 @@ static void fidtx8(const tran_low_t *input, tran_low_t *output) {
static void fidtx16(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 16; ++i)
for (i = 0; i < 16; ++i) {
#if CONFIG_DAALA_DCT16
output[i] = input[i];
#else
output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2);
#endif
}
}
static void fidtx32(const tran_low_t *input, tran_low_t *output) {
......@@ -2354,17 +2385,34 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4;
for (j = 0; j < 16; ++j) {
#if CONFIG_DAALA_DCT16
temp_in[j] = input[j * stride + i] * 16;
#else
temp_in[j] = input[j * stride + i] * 4;
#endif
}
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
for (j = 0; j < 16; ++j) {
#if CONFIG_DAALA_DCT16
out[j * 16 + i] = temp_out[j];
#else
out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
#endif
}
}
// Rows
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16];
ht.rows(temp_in, temp_out);
for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j];
for (j = 0; j < 16; ++j) {
#if CONFIG_DAALA_DCT16
output[j + i * 16] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
#else
output[j + i * 16] = temp_out[j];
#endif
}
}
}
......
......@@ -172,6 +172,7 @@ set(CONFIG_ANALYZER 0 CACHE NUMBER "Internal flag.")
set(CONFIG_DCT_ONLY 0 CACHE NUMBER "Internal flag.")
set(CONFIG_DAALA_DCT4 0 CACHE NUMBER "Internal flag.")
set(CONFIG_DAALA_DCT8 0 CACHE NUMBER "Internal flag.")
set(CONFIG_DAALA_DCT16 0 CACHE NUMBER "Internal flag.")
set(CONFIG_GF_GROUPS 0 CACHE NUMBER "Internal flag.")
set(CONFIG_MRC_TX 0 CACHE NUMBER "Internal flag.")
set(CONFIG_INTER_STATS_ONLY 0 CACHE NUMBER "Internal flag.")
......
......@@ -256,7 +256,7 @@ if (CONFIG_DAALA_DCT4)
endif()
endif()
if (CONFIG_DAALA_DCT4 OR CONFIG_DAALA_DCT8)
if (CONFIG_DAALA_DCT4 OR CONFIG_DAALA_DCT8 OR CONFIG_DAALA_DCT16)
if (HAVE_MMX)
message(WARNING
"--- Disabled HAVE_MMX, incompatible with CONFIG_DAALA_DCTx.")
......
......@@ -296,6 +296,7 @@ EXPERIMENT_LIST="
dct_only
daala_dct4
daala_dct8
daala_dct16
cb4x4
chroma_2x2
chroma_sub8x8
......@@ -573,13 +574,8 @@ post_process_cmdline() {
fi
if enabled daala_dct4; then
enable_feature dct_only
disable_feature mmx
disable_feature rect_tx
disable_feature var_tx
disable_feature lgt
enable_feature lowbitdepth
fi
if enabled daala_dct8; then
if enabled daala_dct4 || enabled daala_dct8 || enabled daala_dct16; then
disable_feature mmx
disable_feature rect_tx
disable_feature var_tx
......
......@@ -23,7 +23,7 @@
#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
#define AV1_DCT_GTEST
#include "av1/encoder/dct.c"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16
#include "av1/common/daala_tx.c"
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment