Commit 02078a38 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add CONFIG_DAALA_DCT4 experiment.

This experiment replaces the 4-point Type-II scaled-output vp9 DCT
 transform with the 4-point Type-II orthonormal Daala DCT transform.
Right now the CONFIG_DAALA_DCT4 experiment depends on CONFIG_DCT_ONLY
 as it does not add an orthonormal 4-point DST.

subset-1:

monty-baseline-dctonly-squaretx-subset1 ->
  monty-dct4-dctonly-squaretx-subset1-rerun

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0055 | -0.0132 | -0.0405 |   0.0261 | 0.0005 |  0.0246 |     0.0226

objective-1-fast:

monty-baseline-dctonly-squaretx-o1f ->
  monty-dct4-dctonly-squaretx-o1f

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0215 | -0.1573 |     N/A |  -0.0131 | -0.0347 | -0.0390 |    -0.1121

Change-Id: Idef8f6e5525037d5bbb2d0927675c21d1922d69a
parent 77b04c8e
......@@ -14,6 +14,9 @@
#include "./aom_dsp_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#if CONFIG_DAALA_DCT4
#include "av1/common/daala_tx.h"
#endif
void aom_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
......@@ -93,6 +96,18 @@ void aom_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
}
}
#if CONFIG_DAALA_DCT4
void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[4];
od_coeff y[4];
for (i = 0; i < 4; i++) y[i] = input[i];
od_bin_idct4(x, 1, y);
for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
}
#else
void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
tran_low_t step[4];
tran_high_t temp1, temp2;
......@@ -112,6 +127,7 @@ void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
output[2] = WRAPLOW(step[1] - step[2]);
output[3] = WRAPLOW(step[0] - step[3]);
}
#endif
void aom_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out[4 * 4];
......
......@@ -33,6 +33,8 @@ set(AOM_AV1_COMMON_SOURCES
"${AOM_ROOT}/av1/common/common_data.h"
"${AOM_ROOT}/av1/common/convolve.c"
"${AOM_ROOT}/av1/common/convolve.h"
"${AOM_ROOT}/av1/common/daala_tx.c"
"${AOM_ROOT}/av1/common/daala_tx.h"
"${AOM_ROOT}/av1/common/debugmodes.c"
"${AOM_ROOT}/av1/common/entropy.c"
"${AOM_ROOT}/av1/common/entropy.h"
......
......@@ -24,6 +24,8 @@ AV1_COMMON_SRCS-yes += common/frame_buffers.h
AV1_COMMON_SRCS-yes += common/alloccommon.h
AV1_COMMON_SRCS-yes += common/blockd.h
AV1_COMMON_SRCS-yes += common/common.h
AV1_COMMON_SRCS-yes += common/daala_tx.c
AV1_COMMON_SRCS-yes += common/daala_tx.h
AV1_COMMON_SRCS-yes += common/entropy.h
AV1_COMMON_SRCS-yes += common/entropymode.h
AV1_COMMON_SRCS-yes += common/entropymv.h
......
#include "av1/common/daala_tx.h"
#include "av1/common/odintrin.h"
/* clang-format off */
# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b)
/* TODO: Daala DCT overflow checks need to be ported as a later test */
# if defined(OD_DCT_CHECK_OVERFLOW)
# else
# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx)
# endif
#define OD_FDCT_2_ASYM(p0, p1, p1h) \
/* Embedded 2-point asymmetric Type-II fDCT. */ \
do { \
p0 += p1h; \
p1 = p0 - p1; \
} \
while (0)
#define OD_IDCT_2_ASYM(p0, p1, p1h) \
/* Embedded 2-point asymmetric Type-II iDCT. */ \
do { \
p1 = p0 - p1; \
p1h = OD_DCT_RSHIFT(p1, 1); \
p0 -= p1h; \
} \
while (0)
#define OD_FDST_2_ASYM(p0, p1) \
/* Embedded 2-point asymmetric Type-IV fDST. */ \
do { \
/* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \
p0 -= (p1*11507 + 8192) >> 14; \
/* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \
p1 += (p0*669 + 512) >> 10; \
/* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \
p0 -= (p1*4573 + 2048) >> 12; \
} \
while (0)
#define OD_IDST_2_ASYM(p0, p1) \
/* Embedded 2-point asymmetric Type-IV iDST. */ \
do { \
/* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
p0 += (p1*4573 + 2048) >> 12; \
/* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
p1 -= (p0*669 + 512) >> 10; \
/* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
p0 += (p1*11507 + 8192) >> 14; \
} \
while (0)
#define OD_FDCT_4(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-II fDCT. */ \
do { \
int q2h; \
int q3h; \
q3 = q0 - q3; \
q3h = OD_DCT_RSHIFT(q3, 1); \
q0 -= q3h; \
q2 += q1; \
q2h = OD_DCT_RSHIFT(q2, 1); \
q1 = q2h - q1; \
OD_FDCT_2_ASYM(q0, q2, q2h); \
OD_FDST_2_ASYM(q3, q1); \
} \
while (0)
#define OD_IDCT_4(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-II iDCT. */ \
do { \
int q1h; \
int q3h; \
OD_IDST_2_ASYM(q3, q2); \
OD_IDCT_2_ASYM(q0, q1, q1h); \
q3h = OD_DCT_RSHIFT(q3, 1); \
q0 += q3h; \
q3 = q0 - q3; \
q2 = q1h - q2; \
q1 -= q2; \
} \
while (0)
void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
int q0;
int q1;
int q2;
int q3;
q0 = x[0*xstride];
q2 = x[1*xstride];
q1 = x[2*xstride];
q3 = x[3*xstride];
OD_FDCT_4(q0, q2, q1, q3);
y[0] = (od_coeff)q0;
y[1] = (od_coeff)q1;
y[2] = (od_coeff)q2;
y[3] = (od_coeff)q3;
}
void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
int q0;
int q1;
int q2;
int q3;
q0 = y[0];
q2 = y[1];
q1 = y[2];
q3 = y[3];
OD_IDCT_4(q0, q2, q1, q3);
x[0*xstride] = q0;
x[1*xstride] = q1;
x[2*xstride] = q2;
x[3*xstride] = q3;
}
#ifndef AOM_DSP_DAALA_TX_H_
#define AOM_DSP_DAALA_TX_H_
#include "av1/common/odintrin.h"
void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]);
#endif
......@@ -36,8 +36,13 @@ int av1_get_tx_scale(const TX_SIZE tx_size) {
#if CONFIG_EXT_TX
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 4; ++i)
for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
output[i] = input[i];
#else
output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
#endif
}
}
static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
......@@ -249,10 +254,12 @@ int get_inv_lgt8(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
const INV_TXFM_PARAM *param) {
int tx_type = param->tx_type;
#if !CONFIG_DAALA_DCT4
if (tx_type == DCT_DCT) {
aom_idct4x4_16_add(input, dest, stride);
return;
}
#endif
static const transform_2d IHT_4[] = {
{ aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0
{ aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1
......@@ -293,12 +300,18 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
// inverse transform row vectors
for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
tran_low_t temp_in[4];
for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
IHT_4[tx_type].rows(temp_in, out[i]);
#else
#if CONFIG_LGT
if (use_lgt_row)
ilgt4(input, out[i], lgtmtx_row[i]);
else
#endif
IHT_4[tx_type].rows(input, out[i]);
#endif
input += 4;
}
......@@ -328,7 +341,11 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
for (j = 0; j < 4; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
#if CONFIG_DAALA_DCT4
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
}
}
}
......@@ -1440,7 +1457,11 @@ static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
}
switch (tx_type) {
#if !CONFIG_DAALA_DCT4
case DCT_DCT: av1_idct4x4_add(input, dest, stride, param); break;
#else
case DCT_DCT:
#endif
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
......
......@@ -89,6 +89,12 @@ extern "C" {
typedef int od_coeff;
/*This is the strength reduced version of ((_a)/(1 << (_b))).
This will not work for _b == 0, however currently this is only used for
b == 1 anyway.*/
# define OD_UNBIASED_RSHIFT32(_a, _b) \
(((int32_t)(((uint32_t)(_a) >> (32 - (_b))) + (_a))) >> (_b))
#define OD_DIVU_DMAX (1024)
extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
......
......@@ -21,6 +21,9 @@
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
#if CONFIG_DAALA_DCT4
#include "av1/common/daala_tx.h"
#endif
static INLINE void range_check(const tran_low_t *input, const int size,
const int bit) {
......@@ -39,6 +42,18 @@ static INLINE void range_check(const tran_low_t *input, const int size,
#endif
}
#if CONFIG_DAALA_DCT4
static void fdct4(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[4];
od_coeff y[4];
for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
od_bin_fdct4(y, x, 1);
for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
}
#else
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
......@@ -74,6 +89,7 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) {
range_check(output, 4, 16);
}
#endif
static void fdct8(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
......@@ -1078,8 +1094,13 @@ int get_fwd_lgt8(transform_1d tx_orig, FWD_TXFM_PARAM *fwd_txfm_param,
// being used for square transforms.
static void fidtx4(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 4; ++i)
for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
output[i] = input[i];
#else
output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
#endif
}
}
static void fidtx8(const tran_low_t *input, tran_low_t *output) {
......@@ -1199,9 +1220,13 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
#if CONFIG_DCT_ONLY
assert(tx_type == DCT_DCT);
#endif
#if !CONFIG_DAALA_DCT4
if (tx_type == DCT_DCT) {
aom_fdct4x4_c(input, output, stride);
} else {
return;
}
#endif
{
static const transform_2d FHT[] = {
{ fdct4, fdct4 }, // DCT_DCT
{ fadst4, fdct4 }, // ADST_DCT
......@@ -1243,8 +1268,11 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < 4; ++i) {
/* A C99-safe upshift by 4 for both Daala and VPx TX. */
for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16;
#if !CONFIG_DAALA_DCT4
if (i == 0 && temp_in[0]) temp_in[0] += 1;
#endif
#if CONFIG_LGT
if (use_lgt_col)
flgt4(temp_in, temp_out, lgtmtx_col[i]);
......@@ -1263,7 +1291,13 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
else
#endif
ht.rows(temp_in, temp_out);
#if CONFIG_DAALA_DCT4
/* Daala TX has orthonormal scaling; shift down by only 1 to achieve
the usual VPx coefficient left-shift of 3. */
for (j = 0; j < 4; ++j) output[j + i * 4] = temp_out[j] >> 1;
#else
for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2;
#endif
}
}
}
......
......@@ -291,6 +291,7 @@ EXPERIMENT_LIST="
cfl
xiphrc
dct_only
daala_dct4
cb4x4
chroma_2x2
chroma_sub8x8
......@@ -559,6 +560,14 @@ post_process_cmdline() {
log_echo "ec_smallmul requires not ans, so disabling ec_smallmul"
disable_feature ec_smallmul
fi
if enabled daala_dct4; then
enable_feature dct_only
disable_feature mmx
disable_feature rect_tx
disable_feature var_tx
disable_feature lgt
enable_feature lowbitdepth
fi
if enabled ext_tile; then
log_echo "ext_tile not compatible with reference_buffer, so"
log_echo "disabling reference_buffer"
......
......@@ -23,6 +23,9 @@
#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
#define AV1_DCT_GTEST
#include "av1/encoder/dct.c"
#if CONFIG_DAALA_DCT4
#include "av1/common/daala_tx.c"
#endif
using libaom_test::ACMRandom;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment