Commit cf7f0069 authored by Yi Luo's avatar Yi Luo

Change hybrid transform function argument from TXFM_2D_CFG* to int

  Unit test shows manually developed SSE4.1 code would performs ~30%
  better if TXFM_2D_CFG configuration is set in lower level. This
  change only updates function signature. There is no performance
  impact.

Change-Id: I62692bd50a21ffc8a944bbd6c155c0a2020ad77b
parent 8f2fa041
......@@ -51,6 +51,7 @@ TEST(vp10_fwd_txfm2d_sse4_1, accuracy) {
int func_idx = get_max_bit(txfm_size) - 2;
Fwd_Txfm2d_Func txfm2d_func_c = txfm2d_func_c_list[func_idx];
Fwd_Txfm2d_Func txfm2d_func_sse4_1 = txfm2d_func_sse4_1_list[func_idx];
int tx_type = libvpx_test::get_tx_type(&cfg);
ACMRandom rnd(ACMRandom::DeterministicSeed());
......@@ -61,8 +62,8 @@ TEST(vp10_fwd_txfm2d_sse4_1, accuracy) {
}
}
txfm2d_func_c(input, output_c, cfg.txfm_size, &cfg, bd);
txfm2d_func_sse4_1(input, output_sse4_1, cfg.txfm_size, &cfg, bd);
txfm2d_func_c(input, output_c, cfg.txfm_size, tx_type, bd);
txfm2d_func_sse4_1(input, output_sse4_1, cfg.txfm_size, tx_type, bd);
for (int r = 0; r < txfm_size; r++) {
for (int c = 0; c < txfm_size; c++) {
EXPECT_EQ(output_c[r * txfm_size + c],
......
......@@ -71,6 +71,7 @@ TEST(vp10_fwd_txfm2d, accuracy) {
fwd_txfm_cfg->shift[2];
double amplify_factor =
amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
int tx_type = libvpx_test::get_tx_type(fwd_txfm_cfg);
ACMRandom rnd(ACMRandom::DeterministicSeed());
int count = 500;
......@@ -83,7 +84,7 @@ TEST(vp10_fwd_txfm2d, accuracy) {
ref_output[ni] = 0;
}
fwd_txfm_func(input, output, txfm_size, fwd_txfm_cfg, bd);
fwd_txfm_func(input, output, txfm_size, tx_type, bd);
reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
for (int ni = 0; ni < sqr_txfm_size; ++ni) {
......
......@@ -30,17 +30,7 @@ namespace {
#if CONFIG_VP9_HIGHBITDEPTH
const int txfm_size_num = 5;
const int txfm_size_ls[5] = {4, 8, 16, 32, 64};
const TXFM_2D_CFG* fwd_txfm_cfg_ls[5][4] = {
{&fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_adst_4,
&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_dct_4},
{&fwd_txfm_2d_cfg_dct_dct_8, &fwd_txfm_2d_cfg_dct_adst_8,
&fwd_txfm_2d_cfg_adst_adst_8, &fwd_txfm_2d_cfg_adst_dct_8},
{&fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_adst_16,
&fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_dct_16},
{&fwd_txfm_2d_cfg_dct_dct_32, &fwd_txfm_2d_cfg_dct_adst_32,
&fwd_txfm_2d_cfg_adst_adst_32, &fwd_txfm_2d_cfg_adst_dct_32},
{&fwd_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}};
const int txfm_type[4] = {DCT_DCT, DCT_ADST, ADST_ADST, ADST_DCT};
const TXFM_2D_CFG* inv_txfm_cfg_ls[5][4] = {
{&inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_adst_4,
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_dct_4},
......@@ -72,11 +62,10 @@ TEST(vp10_inv_txfm2d, round_trip) {
for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
++txfm_type_idx) {
const TXFM_2D_CFG* fwd_txfm_cfg =
fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
const TXFM_2D_CFG* inv_txfm_cfg =
inv_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
if (fwd_txfm_cfg != NULL) {
if (inv_txfm_cfg != NULL) {
int tx_type = txfm_type[txfm_type_idx];
const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx];
const int count = 1000;
......@@ -94,7 +83,7 @@ TEST(vp10_inv_txfm2d, round_trip) {
}
}
fwd_txfm_func(input, output, txfm_size, fwd_txfm_cfg, bd);
fwd_txfm_func(input, output, txfm_size, tx_type, bd);
inv_txfm_func(output, ref_input, txfm_size, inv_txfm_cfg, bd);
for (int ni = 0; ni < sqr_txfm_size; ++ni) {
......
......@@ -21,6 +21,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "vp10/common/enums.h"
#include "vp10/common/vp10_txfm.h"
namespace libvpx_test {
......@@ -104,11 +105,29 @@ typedef void (*TxfmFunc)(const int32_t* in, int32_t* out, const int8_t* cos_bit,
const int8_t* range_bit);
typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, const int,
const TXFM_2D_CFG*, const int);
int tx_type, const int);
typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, const int,
const TXFM_2D_CFG*, const int);
static const int bd = 10;
static const int input_base = (1 << bd);
static INLINE int get_tx_type(const TXFM_2D_CFG *cfg) {
int tx_type;
if (cfg->txfm_type_col <= TXFM_TYPE_DCT64) {
if (cfg->txfm_type_row <= TXFM_TYPE_DCT64) {
tx_type = DCT_DCT;
} else {
tx_type = DCT_ADST;
}
} else {
if (cfg->txfm_type_row <= TXFM_TYPE_DCT64) {
tx_type = ADST_DCT;
} else {
tx_type = ADST_ADST;
}
}
return tx_type;
}
} // namespace libvpx_test
#endif // VP10_TXFM_TEST_H_
......@@ -8,8 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp10/common/vp10_txfm.h"
#include <assert.h>
#include "vp10/common/enums.h"
#include "vp10/common/vp10_fwd_txfm1d.h"
#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
#include "vp10/common/vp10_txfm.h"
static inline TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
......@@ -83,41 +87,145 @@ static inline void fwd_txfm2d_c(const int16_t *input, int32_t *output,
}
void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[4 * 4];
const TXFM_2D_CFG* cfg = vp10_get_txfm_4x4_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[8 * 8];
const TXFM_2D_CFG* cfg = vp10_get_txfm_8x8_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[16 * 16];
const TXFM_2D_CFG* cfg = vp10_get_txfm_16x16_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[32 * 32];
const TXFM_2D_CFG* cfg = vp10_get_txfm_32x32_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[64 * 64];
const TXFM_2D_CFG* cfg = vp10_get_txfm_64x64_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
const TXFM_2D_CFG* vp10_get_txfm_4x4_cfg(int tx_type) {
const TXFM_2D_CFG* cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_4;
break;
case ADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_4;
break;
case DCT_ADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_4;
break;
case ADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_4;
break;
default:
assert(0);
}
return cfg;
}
const TXFM_2D_CFG* vp10_get_txfm_8x8_cfg(int tx_type) {
const TXFM_2D_CFG* cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_8;
break;
case ADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_8;
break;
case DCT_ADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_8;
break;
case ADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_8;
break;
default:
assert(0);
}
return cfg;
}
const TXFM_2D_CFG* vp10_get_txfm_16x16_cfg(int tx_type) {
const TXFM_2D_CFG* cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_16;
break;
case ADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_16;
break;
case DCT_ADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_16;
break;
case ADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_16;
break;
default:
assert(0);
}
return cfg;
}
const TXFM_2D_CFG* vp10_get_txfm_32x32_cfg(int tx_type) {
const TXFM_2D_CFG* cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_32;
break;
case ADST_DCT:
cfg = &fwd_txfm_2d_cfg_adst_dct_32;
break;
case DCT_ADST:
cfg = &fwd_txfm_2d_cfg_dct_adst_32;
break;
case ADST_ADST:
cfg = &fwd_txfm_2d_cfg_adst_adst_32;
break;
default:
assert(0);
}
return cfg;
}
const TXFM_2D_CFG* vp10_get_txfm_64x64_cfg(int tx_type) {
const TXFM_2D_CFG* cfg = NULL;
switch (tx_type) {
case DCT_DCT:
cfg = &fwd_txfm_2d_cfg_dct_dct_64;
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
default:
assert(0);
}
return cfg;
}
......@@ -10,6 +10,7 @@
#ifndef VP10_FWD_TXFM2D_CFG_H_
#define VP10_FWD_TXFM2D_CFG_H_
#include "vp10/common/enums.h"
#include "vp10/common/vp10_fwd_txfm1d.h"
// ---------------- config fwd_dct_dct_4 ----------------
static const int8_t fwd_shift_dct_dct_4[3] = {2, 0, 0};
......@@ -399,4 +400,10 @@ static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = {
TXFM_TYPE_ADST32, // .txfm_type_col
TXFM_TYPE_DCT32}; // .txfm_type_row
const TXFM_2D_CFG* vp10_get_txfm_4x4_cfg(int tx_type);
const TXFM_2D_CFG* vp10_get_txfm_8x8_cfg(int tx_type);
const TXFM_2D_CFG* vp10_get_txfm_16x16_cfg(int tx_type);
const TXFM_2D_CFG* vp10_get_txfm_32x32_cfg(int tx_type);
const TXFM_2D_CFG* vp10_get_txfm_64x64_cfg(int tx_type);
#endif // VP10_FWD_TXFM2D_CFG_H_
......@@ -614,15 +614,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#fwd txfm
add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_4x4 sse4_1/;
add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_8x8 sse4_1/;
add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_16x16 sse4_1/;
add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_32x32 sse4_1/;
add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_64x64 sse4_1/;
#inv txfm
......
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
#include "vp10/common/x86/vp10_txfm1d_sse4.h"
static inline void int16_array_with_stride_to_int32_array_without_stride(
......@@ -49,8 +60,8 @@ static inline TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
}
static inline void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
int32_t *txfm_buf) {
const int stride, const TXFM_2D_CFG *cfg,
int32_t *txfm_buf) {
const int txfm_size = cfg->txfm_size;
const int8_t *shift = cfg->shift;
const int8_t *stage_range_col = cfg->stage_range_col;
......@@ -77,41 +88,46 @@ static inline void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
}
void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[16];
const TXFM_2D_CFG* cfg = vp10_get_txfm_4x4_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[64];
const TXFM_2D_CFG* cfg = vp10_get_txfm_8x8_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[256];
const TXFM_2D_CFG* cfg = vp10_get_txfm_16x16_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[1024];
const TXFM_2D_CFG* cfg = vp10_get_txfm_32x32_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
const int stride, const TXFM_2D_CFG *cfg,
const int bd) {
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[4096];
const TXFM_2D_CFG* cfg = vp10_get_txfm_64x64_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
......@@ -195,20 +195,10 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
switch (tx_type) {
case DCT_DCT:
vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_dct_dct_4, bd);
break;
case ADST_DCT:
vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_adst_dct_4, bd);
break;
case DCT_ADST:
vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_dct_adst_4, bd);
break;
case ADST_ADST:
vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_adst_adst_4, bd);
vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
......@@ -242,20 +232,10 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_dct_dct_8, bd);
break;
case ADST_DCT:
vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_adst_dct_8, bd);
break;
case DCT_ADST:
vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_dct_adst_8, bd);
break;
case ADST_ADST:
vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_adst_adst_8, bd);
vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
......@@ -290,20 +270,10 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_dct_dct_16, bd);
break;
case ADST_DCT:
vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_adst_dct_16, bd);
break;
case DCT_ADST:
vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_dct_adst_16, bd);
break;
case ADST_ADST:
vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_adst_adst_16, bd);
vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
......@@ -340,8 +310,7 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
vp10_fwd_txfm2d_32x32(src_diff, coeff, diff_stride,
&fwd_txfm_2d_cfg_dct_dct_32, bd);
vp10_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment