Commit d61e608d authored by Yi Luo's avatar Yi Luo

Add HBD data path for av1_block_error_avx2

- Add unit test for av1_block_error.
- Fix av1_dist_block logic for calling av1_block_error.

Change-Id: Id8a47ee113417360a29fc2334d9ca72b5793e2d7
parent b91155a9
......@@ -278,6 +278,7 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for av1_block_error can no longer be used.
add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/av1_block_error avx2/;
add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
......@@ -310,6 +311,7 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for av1_block_error can no longer be used.
add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/av1_block_error avx2/;
add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_quantize_fp sse2/;
......
......@@ -1292,14 +1292,16 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
&this_sse) >>
shift;
#endif // CONFIG_HIGHBITDEPTH
#elif CONFIG_HIGHBITDEPTH
const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
*out_dist =
av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
shift;
#else
*out_dist =
av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
#else // !CONFIG_PVQ
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
*out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
&this_sse, xd->bd) >>
shift;
else
#endif
*out_dist =
av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
#endif // CONFIG_PVQ
*out_sse = this_sse >> shift;
} else {
......
......@@ -14,7 +14,20 @@
#include "./av1_rtcd.h"
#include "aom/aom_integer.h"
int64_t av1_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
__m256i *c) {
const tran_low_t *addr = coeff + offset;
#if CONFIG_HIGHBITDEPTH
const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr);
const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1);
const __m256i y = _mm256_packs_epi32(x0, x1);
*c = _mm256_permute4x64_epi64(y, 0xD8);
#else
*c = _mm256_loadu_si256((const __m256i *)addr);
#endif
}
int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
__m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
__m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
......@@ -22,16 +35,16 @@ int64_t av1_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
__m128i sse_reg128, ssz_reg128;
int64_t sse;
int i;
const __m256i zero_reg = _mm256_set1_epi16(0);
const __m256i zero_reg = _mm256_setzero_si256();
// init sse and ssz registerd to zero
sse_reg = _mm256_set1_epi16(0);
ssz_reg = _mm256_set1_epi16(0);
sse_reg = _mm256_setzero_si256();
ssz_reg = _mm256_setzero_si256();
for (i = 0; i < block_size; i += 16) {
// load 32 bytes from coeff and dqcoeff
coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
read_coeff(coeff, i, &coeff_reg);
read_coeff(dqcoeff, i, &dqcoeff_reg);
// dqcoeff - coeff
dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
// madd (dqcoeff - coeff)
......
/*
* Copyright (c) 2017, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
#include "./aom_config.h"
#include "./av1_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
namespace {
using libaom_test::ACMRandom;
typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
const tran_low_t *dqcoeff, intptr_t size,
int64_t *ssz);
#if CONFIG_HIGHBITDEPTH
typedef int64_t (*HbdBlockErrorFunc)(const tran_low_t *coeff,
const tran_low_t *dqcoeff, intptr_t size,
int64_t *ssz, int bd);
#endif
typedef std::tr1::tuple<BlockErrorFunc, BlockErrorFunc, TX_SIZE,
aom_bit_depth_t>
BlockErrorParam;
const int kTestNum = 10000;
class BlockErrorTest : public ::testing::TestWithParam<BlockErrorParam> {
public:
BlockErrorTest()
: blk_err_ref_(GET_PARAM(0)), blk_err_(GET_PARAM(1)),
tx_size_(GET_PARAM(2)), bd_(GET_PARAM(3)) {}
virtual ~BlockErrorTest() {}
virtual void SetUp() {
const intptr_t block_size = getCoeffNum();
coeff_ = reinterpret_cast<tran_low_t *>(
aom_memalign(16, 2 * block_size * sizeof(tran_low_t)));
}
virtual void TearDown() {
aom_free(coeff_);
coeff_ = NULL;
libaom_test::ClearSystemState();
}
void BlockErrorRun(int testNum) {
int i;
int64_t error_ref, error;
int64_t sse_ref, sse;
const intptr_t block_size = getCoeffNum();
tran_low_t *dqcoeff = coeff_ + block_size;
for (i = 0; i < testNum; ++i) {
FillRandomData();
error_ref = blk_err_ref_(coeff_, dqcoeff, block_size, &sse_ref);
ASM_REGISTER_STATE_CHECK(error =
blk_err_(coeff_, dqcoeff, block_size, &sse));
EXPECT_EQ(error_ref, error) << "Error doesn't match on test: " << i;
EXPECT_EQ(sse_ref, sse) << "SSE doesn't match on test: " << i;
}
}
intptr_t getCoeffNum() { return tx_size_2d[tx_size_]; }
void FillRandomData() {
const intptr_t block_size = getCoeffNum();
tran_low_t *dqcoeff = coeff_ + block_size;
intptr_t i;
int16_t margin = 512;
for (i = 0; i < block_size; ++i) {
coeff_[i] = GetRandomNumWithRange(INT16_MIN + margin, INT16_MAX - margin);
dqcoeff[i] = coeff_[i] + GetRandomDeltaWithRange(margin);
}
}
void FillConstantData() {
const intptr_t block_size = getCoeffNum();
tran_low_t *dqcoeff = coeff_ + block_size;
intptr_t i;
for (i = 0; i < block_size; ++i) {
coeff_[i] = 5;
dqcoeff[i] = 7;
}
}
tran_low_t GetRandomNumWithRange(int16_t min, int16_t max) {
return clamp((int16_t)rnd_.Rand16(), min, max);
}
tran_low_t GetRandomDeltaWithRange(int16_t delta) {
tran_low_t value = (int16_t)rnd_.Rand16();
value %= delta;
return value;
}
BlockErrorFunc blk_err_ref_;
BlockErrorFunc blk_err_;
TX_SIZE tx_size_;
aom_bit_depth_t bd_;
ACMRandom rnd_;
tran_low_t *coeff_;
};
TEST_P(BlockErrorTest, BitExact) { BlockErrorRun(kTestNum); }
using std::tr1::make_tuple;
#if !CONFIG_HIGHBITDEPTH && HAVE_SSE2
const BlockErrorParam kBlkErrParamArraySse2[] = { make_tuple(
&av1_block_error_c, &av1_block_error_sse2, TX_32X32, AOM_BITS_8) };
INSTANTIATE_TEST_CASE_P(SSE2, BlockErrorTest,
::testing::ValuesIn(kBlkErrParamArraySse2));
#endif
#if HAVE_AVX2
const BlockErrorParam kBlkErrParamArrayAvx2[] = { make_tuple(
&av1_block_error_c, &av1_block_error_avx2, TX_32X32, AOM_BITS_8) };
INSTANTIATE_TEST_CASE_P(AVX2, BlockErrorTest,
::testing::ValuesIn(kBlkErrParamArrayAvx2));
#endif
} // namespace
......@@ -167,6 +167,7 @@ ifneq ($(CONFIG_NEW_QUANT), yes)
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += quantize_func_test.cc
endif
endif
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += block_error_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_dct_test.cc
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment