Commit 02355a4a authored by Debargha Mukherjee's avatar Debargha Mukherjee Committed by Gerrit Code Review
Browse files

Merge "Added highbitdepth sse2 acceleration for quantize"

parents d28e9ed4 a7b2d09f
......@@ -137,6 +137,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc
ifeq ($(CONFIG_VP9_ENCODER),yes)
......
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <cmath>
#include <cstdlib>
#include <string>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
namespace {
#if CONFIG_VP9_HIGHBITDEPTH
const int kNumIterations = 1000;
typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bps);
typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
ErrorBlockParam;
class ErrorBlockTest
: public ::testing::TestWithParam<ErrorBlockParam> {
public:
virtual ~ErrorBlockTest() {}
virtual void SetUp() {
error_block_op_ = GET_PARAM(0);
ref_error_block_op_ = GET_PARAM(1);
bit_depth_ = GET_PARAM(2);
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
vpx_bit_depth_t bit_depth_;
ErrorBlockFunc error_block_op_;
ErrorBlockFunc ref_error_block_op_;
};
TEST_P(ErrorBlockTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
int err_count_total = 0;
int first_failure = -1;
intptr_t block_size;
int64_t ssz;
int64_t ret;
int64_t ref_ssz;
int64_t ref_ret;
for (int i = 0; i < kNumIterations; ++i) {
int err_count = 0;
block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
for (int j = 0; j < block_size; j++) {
coeff[j] = rnd(2 << 20) - (1 << 20);
dqcoeff[j] = rnd(2 << 20) - (1 << 20);
}
ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
bit_depth_);
ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
&ssz, bit_depth_));
err_count += (ref_ret != ret) | (ref_ssz != ssz);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Error Block Test, C output doesn't match SSE2 output. "
<< "First failed at test case " << first_failure;
}
TEST_P(ErrorBlockTest, ExtremeValues) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
int err_count_total = 0;
int first_failure = -1;
intptr_t block_size;
int64_t ssz;
int64_t ret;
int64_t ref_ssz;
int64_t ref_ret;
int max_val = ((1 << 20) - 1);
for (int i = 0; i < kNumIterations; ++i) {
int err_count = 0;
int k = (i / 9) % 5;
// Change the maximum coeff value, to test different bit boundaries
if ( k == 4 && (i % 9) == 0 ) {
max_val >>= 1;
}
block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
for (int j = 0; j < block_size; j++) {
if (k < 4) { // Test at maximum values
coeff[j] = k % 2 ? max_val : -max_val;
dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val;
} else {
coeff[j] = rnd(2 << 14) - (1 << 14);
dqcoeff[j] = rnd(2 << 14) - (1 << 14);
}
}
ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
bit_depth_);
ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
&ssz, bit_depth_));
err_count += (ref_ret != ret) | (ref_ssz != ssz);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Error Block Test, C output doesn't match SSE2 output. "
<< "First failed at test case " << first_failure;
}
using std::tr1::make_tuple;
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, ErrorBlockTest,
::testing::Values(
make_tuple(&vp9_highbd_block_error_sse2,
&vp9_highbd_block_error_c, VPX_BITS_10),
make_tuple(&vp9_highbd_block_error_sse2,
&vp9_highbd_block_error_c, VPX_BITS_12),
make_tuple(&vp9_highbd_block_error_sse2,
&vp9_highbd_block_error_c, VPX_BITS_8)));
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
namespace {
#if CONFIG_VP9_HIGHBITDEPTH
const int number_of_iterations = 100;
typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
int skip_block, const int16_t *zbin,
const int16_t *round, const int16_t *quant,
const int16_t *quant_shift,
tran_low_t *qcoeff, tran_low_t *dqcoeff,
const int16_t *dequant, int zbin_oq_value,
uint16_t *eob, const int16_t *scan,
const int16_t *iscan);
typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
QuantizeParam;
class VP9QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
public:
virtual ~VP9QuantizeTest() {}
virtual void SetUp() {
quantize_op_ = GET_PARAM(0);
ref_quantize_op_ = GET_PARAM(1);
bit_depth_ = GET_PARAM(2);
mask_ = (1 << bit_depth_) - 1;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
vpx_bit_depth_t bit_depth_;
int mask_;
QuantizeFunc quantize_op_;
QuantizeFunc ref_quantize_op_;
};
class VP9Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
public:
virtual ~VP9Quantize32Test() {}
virtual void SetUp() {
quantize_op_ = GET_PARAM(0);
ref_quantize_op_ = GET_PARAM(1);
bit_depth_ = GET_PARAM(2);
mask_ = (1 << bit_depth_) - 1;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
vpx_bit_depth_t bit_depth_;
int mask_;
QuantizeFunc quantize_op_;
QuantizeFunc ref_quantize_op_;
};
TEST_P(VP9QuantizeTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int zbin_oq_value = 0;
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {
const int skip_block = i == 0;
const TX_SIZE sz = (TX_SIZE)(i % 3); // TX_4X4, TX_8X8 TX_16X16
const TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
const int count = (4 << sz) * (4 << sz); // 16, 64, 256
int err_count = 0;
*eob_ptr = rnd.Rand16();
*ref_eob_ptr = *eob_ptr;
for (int j = 0; j < count; j++) {
coeff_ptr[j] = rnd.Rand16()&mask_;
}
for (int j = 0; j < 2; j++) {
zbin_ptr[j] = rnd.Rand16()&mask_;
round_ptr[j] = rnd.Rand16();
quant_ptr[j] = rnd.Rand16();
quant_shift_ptr[j] = rnd.Rand16();
dequant_ptr[j] = rnd.Rand16();
}
ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
ref_eob_ptr, scan_order->scan, scan_order->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr,
scan_order->scan, scan_order->iscan));
for (int j = 0; j < sz; ++j) {
err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
(ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
}
err_count += (*ref_eob_ptr != *eob_ptr);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Quantization Test, C output doesn't match SSE2 output. "
<< "First failed at test case " << first_failure;
}
TEST_P(VP9Quantize32Test, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int zbin_oq_value = 0;
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {
const int skip_block = i == 0;
const TX_SIZE sz = TX_32X32;
const TX_TYPE tx_type = (TX_TYPE)(i % 4);
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
const int count = (4 << sz) * (4 << sz); // 1024
int err_count = 0;
*eob_ptr = rnd.Rand16();
*ref_eob_ptr = *eob_ptr;
for (int j = 0; j < count; j++) {
coeff_ptr[j] = rnd.Rand16()&mask_;
}
for (int j = 0; j < 2; j++) {
zbin_ptr[j] = rnd.Rand16()&mask_;
round_ptr[j] = rnd.Rand16();
quant_ptr[j] = rnd.Rand16();
quant_shift_ptr[j] = rnd.Rand16();
dequant_ptr[j] = rnd.Rand16();
}
ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
ref_eob_ptr, scan_order->scan, scan_order->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr,
scan_order->scan, scan_order->iscan));
for (int j = 0; j < sz; ++j) {
err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
(ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
}
err_count += (*ref_eob_ptr != *eob_ptr);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Quantization Test, C output doesn't match SSE2 output. "
<< "First failed at test case " << first_failure;
}
TEST_P(VP9QuantizeTest, EOBCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int zbin_oq_value = 0;
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {
int skip_block = i == 0;
TX_SIZE sz = (TX_SIZE)(i % 3); // TX_4X4, TX_8X8 TX_16X16
TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
int count = (4 << sz) * (4 << sz); // 16, 64, 256
int err_count = 0;
*eob_ptr = rnd.Rand16();
*ref_eob_ptr = *eob_ptr;
// Two random entries
for (int j = 0; j < count; j++) {
coeff_ptr[j] = 0;
}
coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
for (int j = 0; j < 2; j++) {
zbin_ptr[j] = rnd.Rand16()&mask_;
round_ptr[j] = rnd.Rand16();
quant_ptr[j] = rnd.Rand16();
quant_shift_ptr[j] = rnd.Rand16();
dequant_ptr[j] = rnd.Rand16();
}
ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
ref_eob_ptr, scan_order->scan, scan_order->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr,
scan_order->scan, scan_order->iscan));
for (int j = 0; j < sz; ++j) {
err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
(ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
}
err_count += (*ref_eob_ptr != *eob_ptr);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Quantization Test, C output doesn't match SSE2 output. "
<< "First failed at test case " << first_failure;
}
TEST_P(VP9Quantize32Test, EOBCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int zbin_oq_value = 0;
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {
int skip_block = i == 0;
TX_SIZE sz = TX_32X32;
TX_TYPE tx_type = (TX_TYPE)(i % 4);
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
int count = (4 << sz) * (4 << sz); // 1024
int err_count = 0;
*eob_ptr = rnd.Rand16();
*ref_eob_ptr = *eob_ptr;
for (int j = 0; j < count; j++) {
coeff_ptr[j] = 0;
}
// Two random entries
coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
for (int j = 0; j < 2; j++) {
zbin_ptr[j] = rnd.Rand16()&mask_;
round_ptr[j] = rnd.Rand16();
quant_ptr[j] = rnd.Rand16();
quant_shift_ptr[j] = rnd.Rand16();
dequant_ptr[j] = rnd.Rand16();
}
ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
ref_eob_ptr, scan_order->scan, scan_order->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr,
scan_order->scan, scan_order->iscan));
for (int j = 0; j < sz; ++j) {
err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
(ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
}
err_count += (*ref_eob_ptr != *eob_ptr);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Quantization Test, C output doesn't match SSE2 output. "
<< "First failed at test case " << first_failure;
}
using std::tr1::make_tuple;
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, VP9QuantizeTest,
::testing::Values(
make_tuple(&vp9_highbd_quantize_b_sse2,
&vp9_highbd_quantize_b_c, VPX_BITS_8),
make_tuple(&vp9_highbd_quantize_b_sse2,
&vp9_highbd_quantize_b_c, VPX_BITS_10),
make_tuple(&vp9_highbd_quantize_b_sse2,
&vp9_highbd_quantize_b_c, VPX_BITS_12)));
INSTANTIATE_TEST_CASE_P(
SSE2, VP9Quantize32Test,
::testing::Values(
make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
&vp9_highbd_quantize_b_32x32_c, VPX_BITS_8),
make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
&vp9_highbd_quantize_b_32x32_c, VPX_BITS_10),
make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
&vp9_highbd_quantize_b_32x32_c, VPX_BITS_12)));
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
......@@ -1855,7 +1855,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# ENCODEMB INVOKE
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp9_highbd_block_error/;
specialize qw/vp9_highbd_block_error sse2/;
add_proto qw/void vp9_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
specialize qw/vp9_highbd_subtract_block/;
......@@ -1867,10 +1867,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_highbd_quantize_fp_32x32/;
add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_b/;
specialize qw/vp9_highbd_quantize_b sse2/;
add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_b_32x32/;
specialize qw/vp9_highbd_quantize_b_32x32 sse2/;
#
# Structured Similarity (SSIM)
......
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <emmintrin.h>
#include <stdio.h>
#include "vp9/common/vp9_common.h"
int64_t vp9_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz,
int bps) {
int i, j, test;
uint32_t temp[4];
__m128i max, min, cmp0, cmp1, cmp2, cmp3;
int64_t error = 0, sqcoeff = 0;
const int shift = 2 * (bps - 8);
const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i+=8) {
// Load the data into xmm registers
__m128i mm_coeff = _mm_load_si128((__m128i*) (coeff + i));
__m128i mm_coeff2 = _mm_load_si128((__m128i*) (coeff + i + 4));
__m128i mm_dqcoeff = _mm_load_si128((__m128i*) (dqcoeff + i));
__m128i mm_dqcoeff2 = _mm_load_si128((__m128i*) (dqcoeff + i + 4));
// Check if any values require more than 15 bit
max = _mm_set1_epi32(0x3fff);
min = _mm_set1_epi32(0xffffc000);
cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
_mm_cmplt_epi32(mm_coeff, min));
cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
_mm_cmplt_epi32(mm_coeff2, min));
cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
_mm_cmplt_epi32(mm_dqcoeff, min));
cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
_mm_cmplt_epi32(mm_dqcoeff2, min));
test = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(cmp0, cmp1),
_mm_or_si128(cmp2, cmp3)));
if (!test) {
__m128i mm_diff, error_sse2, sqcoeff_sse2;;
mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
_mm_storeu_si128((__m128i*)temp, error_sse2);
error = error + temp[0] + temp[1] + temp[2] + temp[3];