Commit 674bffdc authored by Angie Chiang's avatar Angie Chiang

Add rounding option into av1_convolve

Use a round flag in ConvolveParams to indicate if the destination buffer
has the result rounded by FILTER_BITS or not.
This CL is part of the goal of reducing interpolation rounding error in
compound prediction mode.

Change-Id: I49e522a89a67a771f5a6e7fbbc609e97923aecb6
parent 203b1d30
......@@ -9,6 +9,7 @@ print <<EOF
#include "av1/common/enums.h"
#include "av1/common/quant_common.h"
#include "av1/common/filter.h"
#include "av1/common/convolve.h"
#include "av1/common/av1_txfm.h"
struct macroblockd;
......@@ -41,10 +42,10 @@ if ($opts{arch} eq "x86_64") {
add_proto qw/void av1_convolve_init/, "void";
specialize qw/av1_convolve_init ssse3/;
add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_horiz ssse3/;
add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_vert ssse3/;
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
......
This diff is collapsed.
......@@ -17,6 +17,11 @@
extern "C" {
#endif
typedef struct ConvolveParams {
int ref;
int round;
} ConvolveParams;
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
......@@ -25,7 +30,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
const InterpFilter interp_filter,
#endif
const int subpel_x, int xstep, const int subpel_y, int ystep,
int avg);
ConvolveParams *conv_params);
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
......
......@@ -62,8 +62,11 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
// ref_idx > 0 means this is the second reference frame
// first reference frame's prediction result is already in dst
// therefore we need to average the first and second results
ConvolveParams conv_params;
conv_params.round = 1;
conv_params.ref = ref_idx;
av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
subpel_x, xs, subpel_y, ys, ref_idx);
subpel_x, xs, subpel_y, ys, &conv_params);
}
}
......
......@@ -667,16 +667,17 @@ static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, int avg) {
const int subpel_x_q4, int x_step_q4,
ConvolveParams *conv_params) {
DECLARE_ALIGNED(16, uint16_t, temp[8 * 8]);
__m128i verf[6];
__m128i horf[2];
SubpelFilterCoeffs hCoeffs, vCoeffs;
const uint8_t *src_ptr;
store_pixel_t store2p = store2pixelTab[avg];
store_pixel_t store4p = store4pixelTab[avg];
transpose_to_dst_t transpose_4x4 = trans4x4Tab[avg];
transpose_to_dst_t transpose_8x8 = trans8x8Tab[avg];
store_pixel_t store2p = store2pixelTab[conv_params->ref];
store_pixel_t store4p = store4pixelTab[conv_params->ref];
transpose_to_dst_t transpose_4x4 = trans4x4Tab[conv_params->ref];
transpose_to_dst_t transpose_8x8 = trans8x8Tab[conv_params->ref];
const int tapsNum = filter_params.taps;
int block_height, block_residu;
......@@ -685,7 +686,7 @@ void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
if (0 == subpel_x_q4 || 16 != x_step_q4) {
av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg);
subpel_x_q4, x_step_q4, conv_params);
return;
}
......@@ -694,7 +695,7 @@ void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
if (!hCoeffs || !vCoeffs) {
av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg);
subpel_x_q4, x_step_q4, conv_params);
return;
}
......@@ -881,19 +882,20 @@ static void filter_vert_compute_large(const uint8_t *src, int src_stride,
void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_y_q4, int y_step_q4, int avg) {
const int subpel_y_q4, int y_step_q4,
ConvolveParams *conv_params) {
__m128i verf[6];
SubpelFilterCoeffs vCoeffs;
const uint8_t *src_ptr;
uint8_t *dst_ptr = dst;
store_pixel_t store2p = store2pixelTab[avg];
store_pixel_t store4p = store4pixelTab[avg];
store_pixel_t store8p = store8pixelTab[avg];
store_pixel_t store2p = store2pixelTab[conv_params->ref];
store_pixel_t store4p = store4pixelTab[conv_params->ref];
store_pixel_t store8p = store8pixelTab[conv_params->ref];
const int tapsNum = filter_params.taps;
if (0 == subpel_y_q4 || 16 != y_step_q4) {
av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg);
subpel_y_q4, y_step_q4, conv_params);
return;
}
......@@ -901,7 +903,7 @@ void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
if (!vCoeffs) {
av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg);
subpel_y_q4, y_step_q4, conv_params);
return;
}
......
......@@ -23,7 +23,8 @@ using std::tr1::tuple;
using libaom_test::ACMRandom;
typedef void (*conv_filter_t)(const uint8_t *, int, uint8_t *, int, int, int,
const InterpFilterParams, const int, int, int);
const InterpFilterParams, const int, int,
ConvolveParams *);
#if CONFIG_AOM_HIGHBITDEPTH
typedef void (*hbd_conv_filter_t)(const uint16_t *, int, uint16_t *, int, int,
int, const InterpFilterParams, const int, int,
......@@ -68,7 +69,8 @@ class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
height_ = std::tr1::get<1>(block);
filter_ = GET_PARAM(3);
subpel_ = GET_PARAM(4);
avg_ = GET_PARAM(5);
conv_params_.round = 1;
conv_params_.ref = GET_PARAM(5);
alloc_ = new uint8_t[maxBlockSize * 4];
src_ = alloc_ + (vertiOffset * maxWidth);
......@@ -102,7 +104,7 @@ class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
int height_;
int filter_;
int subpel_;
int avg_;
ConvolveParams conv_params_;
};
void AV1ConvolveOptimzTest::PrepFilterBuffer() {
......@@ -154,10 +156,10 @@ void AV1ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_);
filter_params, subpel_, x_step_q4, &conv_params_);
conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params,
subpel_, x_step_q4, avg_);
subpel_, x_step_q4, &conv_params_);
DiffFilterBuffer();
......@@ -170,10 +172,10 @@ void AV1ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_,
intermediate_height, filter_params, subpel_, x_step_q4,
avg_);
&conv_params_);
conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
filter_params, subpel_, x_step_q4, avg_);
filter_params, subpel_, x_step_q4, &conv_params_);
DiffFilterBuffer();
}
......@@ -184,10 +186,10 @@ void AV1ConvolveOptimzTest::RunVertFilterBitExactCheck() {
InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
av1_convolve_vert_c(src_ref_, stride, dst_ref_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_);
filter_params, subpel_, x_step_q4, &conv_params_);
conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params,
subpel_, x_step_q4, avg_);
subpel_, x_step_q4, &conv_params_);
DiffFilterBuffer();
}
......
......@@ -52,11 +52,14 @@ TEST(AV1ConvolveTest, av1_convolve8) {
int y_step_q4 = 16;
int subpel_x_q4 = 3;
int subpel_y_q4 = 2;
int avg = 0;
int w = 1;
int h = 1;
ConvolveParams conv_params;
conv_params.ref = 0;
conv_params.round = 1;
setup_convolve();
for (int i = 0; i < filter_size * filter_size; i++) {
......@@ -65,7 +68,7 @@ TEST(AV1ConvolveTest, av1_convolve8) {
av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, avg);
subpel_y_q4, y_step_q4, &conv_params);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
......@@ -96,13 +99,16 @@ TEST(AV1ConvolveTest, av1_convolve) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int avg = 0;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
ConvolveParams conv_params;
conv_params.ref = 0;
conv_params.round = 1;
ASSERT_LE(filter_size, 12);
setup_convolve();
......@@ -114,7 +120,7 @@ TEST(AV1ConvolveTest, av1_convolve) {
for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, avg);
subpel_y_q4, y_step_q4, &conv_params);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
......@@ -156,13 +162,16 @@ TEST(AV1ConvolveTest, av1_convolve_vert_first) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int avg = 0;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
ConvolveParams conv_params;
conv_params.ref = 0;
conv_params.round = 1;
ASSERT_LE(filter_size_x, 12);
ASSERT_LE(filter_size_y, 12);
setup_convolve();
......@@ -175,7 +184,8 @@ TEST(AV1ConvolveTest, av1_convolve_vert_first) {
for (subpel_y_q4 = 1; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
av1_convolve(src + src_stride * filter_center_y + filter_center_x,
src_stride, dst, dst_stride, w, h, interp_filter,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
&conv_params);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x_q4);
......@@ -222,7 +232,6 @@ TEST(AV1ConvolveTest, av1_convolve_avg) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int avg = 0;
int w = 1;
int h = 1;
......@@ -230,6 +239,10 @@ TEST(AV1ConvolveTest, av1_convolve_avg) {
int subpel_x_q4;
int subpel_y_q4;
ConvolveParams conv_params;
conv_params.ref = 0;
conv_params.round = 1;
setup_convolve();
for (int i = 0; i < filter_size * filter_size; i++) {
......@@ -241,23 +254,23 @@ TEST(AV1ConvolveTest, av1_convolve_avg) {
for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
avg = 0;
conv_params.ref = 0;
av1_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 0;
y_step_q4, &conv_params);
conv_params.ref = 0;
av1_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
y_step_q4, &conv_params);
avg = 0;
conv_params.ref = 0;
av1_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 1;
y_step_q4, &conv_params);
conv_params.ref = 1;
av1_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
y_step_q4, &conv_params);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment