Commit a4e245a9 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add CONFIG_DAALA_DCT64 experiment.

This experiment replaces the 64-point Type-II DCT and related
scaling vp9 transforms with the 64-point orthonormal
Daala transforms.

subset-1:

    monty-square-baseline-s1-F2@2017-07-28T03:35:45.962Z ->
      monty-square-dct64-s1-F2@2017-07-29T04:50:58.412Z

       PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
    -0.1930 | -0.2037 | -0.0643 |  -0.1917 | -0.2331 | -0.3510 |    -0.1810

objective-1-fast:

    monty-square-baseline-o1f-F2@2017-07-28T03:35:35.533Z ->
      monty-square-dct64-o1f-F2@2017-07-29T04:50:28.542Z

       PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
    -0.2557 | -0.1743 | -0.4900 |  -0.3028 | -0.4147 | -0.5764 |    -0.2864

Change-Id: I1f944df29e44d2e350c42555af274f2d75a62a92
parent ccfdfce1
......@@ -15,7 +15,7 @@
#include "./aom_dsp_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
CONFIG_DAALA_DCT32
CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
#include "av1/common/daala_tx.h"
#endif
......@@ -1469,6 +1469,17 @@ void aom_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
}
}
#if CONFIG_TX64X64 && CONFIG_DAALA_DCT64
void aom_idct64_c(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[64];
od_coeff y[64];
for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i];
od_bin_idct64(x, 1, y);
for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i];
}
#endif
void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
......
......@@ -68,6 +68,9 @@ void aom_idct4_c(const tran_low_t *input, tran_low_t *output);
void aom_idct8_c(const tran_low_t *input, tran_low_t *output);
void aom_idct16_c(const tran_low_t *input, tran_low_t *output);
void aom_idct32_c(const tran_low_t *input, tran_low_t *output);
#if CONFIG_TX64X64 && CONFIG_DAALA_DCT64
void aom_idct64_c(const tran_low_t *input, tran_low_t *output);
#endif
void aom_iadst4_c(const tran_low_t *input, tran_low_t *output);
void aom_iadst8_c(const tran_low_t *input, tran_low_t *output);
void aom_iadst16_c(const tran_low_t *input, tran_low_t *output);
......
This diff is collapsed.
......@@ -15,5 +15,8 @@ void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
#if CONFIG_TX64X64
void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride);
void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]);
#endif
#endif
......@@ -81,8 +81,13 @@ static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 64; ++i)
for (i = 0; i < 64; ++i) {
#if CONFIG_DAALA_DCT64
output[i] = input[i];
#else
output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
#endif
}
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
......@@ -118,6 +123,29 @@ static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
#endif
#if CONFIG_TX64X64
#if CONFIG_DAALA_DCT64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
aom_idct64_c(input, output);
}
static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
aom_idct64_c(input, output);
}
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[32];
// No scaling within; Daala transforms are all orthonormal
for (i = 0; i < 32; ++i) {
inputhalf[i] = input[i];
}
for (i = 0; i < 32; ++i) {
output[i] = input[32 + i];
}
aom_idct32_c(inputhalf, output + 32);
}
#else
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
......@@ -148,6 +176,7 @@ static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
aom_idct32_c(inputhalf, output + 32);
// Note overall scaling factor is 4 * sqrt(2) times orthogonal
}
#endif // CONFIG_DAALA_DCT64
#endif // CONFIG_TX64X64
// Inverse identity transform and add.
......@@ -1416,8 +1445,15 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors
for (i = 0; i < 64; ++i) {
#if CONFIG_DAALA_DCT64
tran_low_t temp_in[64];
for (j = 0; j < 64; j++) temp_in[j] = input[j] * 2;
IHT_64[tx_type].rows(temp_in, out[i]);
// Do not rescale intermediate for Daala
#else
IHT_64[tx_type].rows(input, out[i]);
for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
#endif
input += 64;
}
......@@ -1440,7 +1476,11 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < 64; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
#if CONFIG_DAALA_DCT64
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 2));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
#endif
}
}
}
......@@ -1575,13 +1615,13 @@ static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
}
#endif
#if CONFIG_TX64X64
#if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
const TxfmParam *txfm_param) {
(void)txfm_param;
av1_iht64x64_4096_add(input, dest, stride, txfm_param);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
#if CONFIG_CHROMA_2X2
static void inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -1875,7 +1915,11 @@ static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const TX_TYPE tx_type = txfm_param->tx_type;
switch (tx_type) {
#if !CONFIG_DAALA_DCT64
case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
#else
case DCT_DCT:
#endif
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
......
......@@ -22,7 +22,7 @@
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
CONFIG_DAALA_DCT32
CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
#include "av1/common/daala_tx.h"
#endif
......@@ -782,6 +782,16 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) {
#endif
#ifndef AV1_DCT_GTEST
#if CONFIG_TX64X64 && CONFIG_DAALA_DCT64
static void fdct64(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[64];
od_coeff y[64];
for (i = 0; i < 64; i++) x[i] = (od_coeff)input[i];
od_bin_fdct64(y, x, 1);
for (i = 0; i < 64; i++) output[i] = (tran_low_t)y[i];
}
#endif
static void fadst4(const tran_low_t *input, tran_low_t *output) {
tran_high_t x0, x1, x2, x3;
......@@ -2530,6 +2540,37 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
}
#if CONFIG_TX64X64
#if CONFIG_DAALA_DCT64
#if CONFIG_EXT_TX
static void fidtx64(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 64; ++i) output[i] = input[i];
}
// For use in lieu of ADST
static void fhalfright64(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[32];
// No scaling within; Daala transforms are all orthonormal
for (i = 0; i < 32; ++i) {
output[32 + i] = input[i];
}
for (i = 0; i < 32; ++i) {
inputhalf[i] = input[i + 32];
}
fdct32(inputhalf, output);
// Note overall scaling factor is 2 times unitary
}
#endif // CONFIG_EXT_TX
static void fdct64_col(const tran_low_t *input, tran_low_t *output) {
fdct64(input, output);
}
static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
fdct64(input, output);
}
#else
#if CONFIG_EXT_TX
static void fidtx64(const tran_low_t *input, tran_low_t *output) {
int i;
......@@ -2568,6 +2609,7 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
#endif
void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
......@@ -2609,10 +2651,18 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < 64; ++i) {
#if CONFIG_DAALA_DCT64
for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i] * 16;
ht.cols(temp_in, temp_out);
for (j = 0; j < 64; ++j)
out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 3;
#else
for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 64; ++j)
out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
#endif
}
// Rows
......@@ -2620,8 +2670,12 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < 64; ++j) temp_in[j] = out[j + i * 64];
ht.rows(temp_in, temp_out);
for (j = 0; j < 64; ++j)
#if CONFIG_DAALA_DCT64
output[j + i * 64] = temp_out[j];
#else
output[j + i * 64] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
#endif
}
}
#endif // CONFIG_TX64X64
......
......@@ -127,6 +127,7 @@ set(CONFIG_CDEF 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT16 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT32 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT4 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT64 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DCT8 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_DIST 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DCT_ONLY 0 CACHE NUMBER "AV1 experiment flag.")
......
......@@ -247,8 +247,16 @@ if (CONFIG_DAALA_DCT4 AND NOT CONFIG_DCT_ONLY)
change_config_and_warn(CONFIG_DCT_ONLY 1 CONFIG_DAALA_DCT4)
endif()
if (CONFIG_DAALA_DCT64)
if (NOT CONFIG_TX64X64)
message(WARNING
"--- Enabled CONFIG_TX64X64, needed for CONFIG_DAALA_DCT64.")
set(CONFIG_TX64X64 1)
endif()
endif()
if (CONFIG_DAALA_DCT4 OR CONFIG_DAALA_DCT8 OR CONFIG_DAALA_DCT16 OR
CONFIG_DAALA_DCT32)
CONFIG_DAALA_DCT32 OR CONFIG_DAALA_DCT64)
if (HAVE_MMX)
change_config_and_warn(HAVE_MMX 0 CONFIG_DAALA_DCTx)
endif()
......
......@@ -295,6 +295,7 @@ EXPERIMENT_LIST="
daala_dct8
daala_dct16
daala_dct32
daala_dct64
cb4x4
chroma_2x2
chroma_sub8x8
......@@ -576,10 +577,14 @@ post_process_cmdline() {
if enabled daala_dct4; then
enable_feature dct_only
fi
if enabled daala_dct64; then
enable_feature tx64x64
fi
if enabled daala_dct4 ||
enabled daala_dct8 ||
enabled daala_dct16 ||
enabled daala_dct32; then
enabled daala_dct32 ||
enabled daala_dct64; then
disable_feature mmx
disable_feature rect_tx
disable_feature var_tx
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment