Commit a2d40a39 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Simplify Daala forward TX toplevel for constant shift

Rather than backing out all the LGT-related shifting matrices
throughout the existing TX code, separate out and simplify Daala
forward TX into a single dedicated entry point.  When DAALA_TX is
enabled, CONFIG_HIGHBITDEPTH is also forced, and all of Daala TX
(lowbd and highbd) uses this single TX dispatch.

At present, this should result in no effective functional change,
however rectangular transforms are now always column-first-- that
has minor rounding effects.

subset 1:
monty-daalaTX-fulltest-DaalaRDO-s1@2017-11-07T00:02:56.282Z ->
 monty-daalaTX-fulltest-fwd-s1@2017-11-07T03:08:55.478Z

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0576 |     N/A | -0.2646 |  -0.0125 | -0.0439 | -0.0479 |    -0.1798

objective 1 fast:
monty-daalaTX-fulltest-DaalaRDO-o1f4@2017-11-07T05:59:50.180Z ->
 monty-daalaTX-fulltest-fwd-o1f4@2017-11-07T06:00:08.500Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
0.0036 |  0.0477 |  0.1132 |   0.0863 | -0.0017 |  0.0209 |     0.0240

Change-Id: I182a5c4388c410cbea8810e2f9e36fd37a4a46e5
parent 1aeee2e9
......@@ -109,6 +109,8 @@ set(AOM_AV1_ENCODER_SOURCES
"${AOM_ROOT}/av1/encoder/context_tree.h"
"${AOM_ROOT}/av1/encoder/cost.c"
"${AOM_ROOT}/av1/encoder/cost.h"
"${AOM_ROOT}/av1/encoder/daala_fwd_txfm.c"
"${AOM_ROOT}/av1/encoder/daala_fwd_txfm.h"
"${AOM_ROOT}/av1/encoder/dct.c"
"${AOM_ROOT}/av1/encoder/encodeframe.c"
"${AOM_ROOT}/av1/encoder/encodeframe.h"
......
......@@ -27,6 +27,8 @@ AV1_CX_SRCS-yes += encoder/context_tree.c
AV1_CX_SRCS-yes += encoder/context_tree.h
AV1_CX_SRCS-yes += encoder/cost.h
AV1_CX_SRCS-yes += encoder/cost.c
AV1_CX_SRCS-yes += encoder/daala_fwd_txfm.c
AV1_CX_SRCS-yes += encoder/daala_fwd_txfm.h
AV1_CX_SRCS-yes += encoder/dct.c
AV1_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c
AV1_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h
......
......@@ -5186,6 +5186,40 @@ void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) {
}
#endif
void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride) {
int i;
for (i = 0; i < 4; i++)
y[i] = x[i*xstride];
}
void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride) {
int i;
for (i = 0; i < 8; i++)
y[i] = x[i*xstride];
}
void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride) {
int i;
for (i = 0; i < 16; i++)
y[i] = x[i*xstride];
}
void od_bin_fidtx32(od_coeff y[32], const od_coeff *x, int xstride) {
int i;
for (i = 0; i < 32; i++)
y[i] = x[i*xstride];
}
#if CONFIG_TX64X64
void od_bin_fidtx64(od_coeff y[64], const od_coeff *x, int xstride) {
int i;
for (i = 0; i < 64; i++)
y[i] = x[i*xstride];
}
#endif
// Below are intermediate wrappers that handle the case when
// tran_low_t is a smaller type than od_coeff
void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[4];
......
......@@ -35,19 +35,26 @@ void daala_idtx64(const tran_low_t *input, tran_low_t *output);
void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]);
void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]);
void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
void od_bin_fdst32(od_coeff y[32], const od_coeff *x, int xstride);
void od_bin_fidtx32(od_coeff y[32], const od_coeff *x, int xstride);
#if CONFIG_TX64X64
void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride);
void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]);
void od_bin_fidtx64(od_coeff y[64], const od_coeff *x, int xstride);
#endif
#endif
/*
* Copyright (c) 2017, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
#include "av1/common/daala_tx.h"
#include "av1/encoder/daala_fwd_txfm.h"
#if CONFIG_DAALA_TX
// Temporary while we still need av1_get_tx_scale() for testing
#include "av1/common/idct.h"
// Complete Daala TX map, sans lossless which is special cased
typedef void (*daala_ftx)(od_coeff[], const od_coeff *, int);
static daala_ftx tx_map[TX_SIZES][TX_TYPES_1D] = {
// 4-point transforms
{ od_bin_fdct4, od_bin_fdst4, od_bin_fdst4, od_bin_fidtx4 },
// 8-point transforms
{ od_bin_fdct8, od_bin_fdst8, od_bin_fdst8, od_bin_fidtx8 },
// 16-point transforms
{ od_bin_fdct16, od_bin_fdst16, od_bin_fdst16, od_bin_fidtx16 },
// 32-point transforms
{ od_bin_fdct32, od_bin_fdst32, od_bin_fdst32, od_bin_fidtx32 },
#if CONFIG_TX64X64
// 64-point transforms
{ od_bin_fdct64, NULL, NULL, od_bin_fidtx64 },
#endif
};
static int tx_flip(TX_TYPE_1D t) { return t == 2; }
// Daala TX toplevel entry point, same interface as av1 low-bidepth
// and high-bitdepth TX (av1_fwd_txfm and av1_highbd_fwd_txfm). This
// same function is intended for both low and high bitdepth cases with
// a tran_low_t of 32 bits (matching od_coeff).
void daala_fwd_txfm(const int16_t *input_pixels, tran_low_t *output_coeffs,
int input_stride, TxfmParam *txfm_param) {
const TX_SIZE tx_size = txfm_param->tx_size;
const TX_TYPE tx_type = txfm_param->tx_type;
assert(tx_size <= TX_SIZES_ALL);
assert(tx_type <= TX_TYPES);
if (txfm_param->lossless) {
// Transform function special-cased for lossless
assert(tx_type == DCT_DCT);
assert(tx_size == TX_4X4);
av1_fwht4x4(input_pixels, output_coeffs, input_stride);
} else {
// General TX case
// up 4, down 1 compatability mode with av1_get_tx_scale
const int upshift = 4;
assert(upshift >= 0);
assert(sizeof(tran_low_t) == sizeof(od_coeff));
assert(sizeof(tran_low_t) >= 4);
// Hook into existing map translation infrastructure to select
// appropriate TX functions
const int cols = tx_size_wide[tx_size];
const int rows = tx_size_high[tx_size];
const TX_SIZE col_idx = txsize_vert_map[tx_size];
const TX_SIZE row_idx = txsize_horz_map[tx_size];
assert(col_idx <= TX_SIZES);
assert(row_idx <= TX_SIZES);
assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
daala_ftx col_tx = tx_map[col_idx][vtx_tab[tx_type]];
daala_ftx row_tx = tx_map[row_idx][htx_tab[tx_type]];
int col_flip = tx_flip(vtx_tab[tx_type]);
int row_flip = tx_flip(htx_tab[tx_type]);
od_coeff tmp[MAX_TX_SIZE];
int r;
int c;
assert(col_tx);
assert(row_tx);
// Transform columns
for (c = 0; c < cols; ++c) {
// Cast and shift
for (r = 0; r < rows; ++r)
tmp[r] =
((od_coeff)(input_pixels[r * input_stride + c])) * (1 << upshift);
if (col_flip)
col_tx(tmp, tmp + (rows - 1), -1);
else
col_tx(tmp, tmp, 1);
// No ystride in daala_tx lowlevel functions, store output vector
// into column the long way
for (r = 0; r < rows; ++r) output_coeffs[r * cols + c] = tmp[r];
}
// Transform rows
for (r = 0; r < rows; ++r) {
if (row_flip)
row_tx(output_coeffs + r * cols, output_coeffs + r * cols + cols - 1,
-1);
else
row_tx(output_coeffs + r * cols, output_coeffs + r * cols, 1);
}
// This is temporary while we're testing against existing
// behavior (preshift up 4, then downshift by one plus av1_get_tx_scale)
int downshift = 1 + av1_get_tx_scale(tx_size);
for (r = 0; r < rows; ++r)
for (c = 0; c < cols; ++c)
output_coeffs[r * cols + c] =
ROUND_POWER_OF_TWO_SIGNED(output_coeffs[r * cols + c], downshift);
}
}
#endif
/*
* Copyright (c) 2017, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_ENCODER_DAALA_FWD_TXFM_H_
#define AV1_ENCODER_DAALA_FWD_TXFM_H_
#include "./aom_config.h"
#ifdef __cplusplus
extern "C" {
#endif
void daala_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
TxfmParam *txfm_param);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // AV1_ENCODER_DAALA_FWD_TXFM_H_
......@@ -15,6 +15,9 @@
#include "av1/common/idct.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#if CONFIG_DAALA_TX
#include "av1/encoder/daala_fwd_txfm.h"
#else
static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
......@@ -457,10 +460,14 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
}
}
#endif // CONFIG_TX64X64
#endif // CONFIG_DAALA_TXFM
void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
TxfmParam *txfm_param) {
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
#if CONFIG_DAALA_TX
daala_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
#else
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
......@@ -512,11 +519,15 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
#endif
default: assert(0); break;
}
#endif
}
void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
#if CONFIG_DAALA_TX
daala_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
#else
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
......@@ -562,4 +573,5 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
break;
default: assert(0); break;
}
#endif
}
......@@ -39,6 +39,7 @@ macro (fix_experiment_configs)
endif ()
if (CONFIG_DAALA_TX)
set(CONFIG_HIGHBITDEPTH 1)
set(CONFIG_DAALA_TX4 1)
set(CONFIG_DAALA_TX8 1)
set(CONFIG_DAALA_TX16 1)
......
......@@ -560,6 +560,7 @@ post_process_cmdline() {
disable_feature rawbits
fi
if enabled daala_tx; then
enable_feature highbitdepth
enable_feature daala_tx4
enable_feature daala_tx8
enable_feature daala_tx16
......
......@@ -20,7 +20,7 @@
#include "test/transform_test_base.h"
#include "test/util.h"
#if CONFIG_TX64X64
#if CONFIG_TX64X64 && !CONFIG_DAALA_TX
using libaom_test::ACMRandom;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment