Commit fa005074 authored by Yunqing Wang's avatar Yunqing Wang

Remove convolve_round/compound_round config flags

Merged convolve_round experiment and removed its config flag in the code.
Removed compound_round code.

Change-Id: Ic01856732d75cca65d3866383d3cc1dd572f8863
parent ca14b47f
......@@ -735,9 +735,7 @@ if (aom_config("CONFIG_AV1") eq "yes") {
#
# Alpha blending with mask
#
if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
}
add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
......
......@@ -18,7 +18,6 @@
#include "./aom_dsp_rtcd.h"
#if CONFIG_CONVOLVE_ROUND
// Blending with alpha mask. Mask values come from the range [0, 64],
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
// be the same as dst, or dst can be different from both sources.
......@@ -79,7 +78,6 @@ void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
}
}
}
#endif // CONFIG_CONVOLVE_ROUND
// Blending with alpha mask. Mask values come from the range [0, 64],
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
......
......@@ -300,53 +300,49 @@ if (NOT CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_neon.c")
endif ()
if (CONFIG_CONVOLVE_ROUND)
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_avx2.c")
if (CONFIG_HIGHBITDEPTH)
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_avx2.c")
if (CONFIG_HIGHBITDEPTH)
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_avx2.c")
"${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_avx2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_ssse3.c")
endif ()
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_ssse3.c")
endif ()
if (CONFIG_JNT_COMP)
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/convolve_2d_sse4.c")
endif ()
if (CONFIG_JNT_COMP)
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/convolve_2d_sse4.c")
endif ()
if(NOT CONFIG_COMPOUND_ROUND)
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/av1_convolve_scale_sse4.c")
endif()
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/av1_convolve_scale_sse4.c")
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/x86/convolve_sse2.c")
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/x86/convolve_avx2.c")
endif ()
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/x86/convolve_avx2.c")
set(AOM_AV1_ENCODER_SOURCES
${AOM_AV1_ENCODER_SOURCES}
"${AOM_ROOT}/av1/encoder/wedge_utils.c")
set(AOM_AV1_ENCODER_SOURCES
${AOM_AV1_ENCODER_SOURCES}
"${AOM_ROOT}/av1/encoder/wedge_utils.c")
set(AOM_AV1_ENCODER_INTRIN_SSE2
${AOM_AV1_ENCODER_INTRIN_SSE2}
"${AOM_ROOT}/av1/encoder/x86/wedge_utils_sse2.c")
set(AOM_AV1_ENCODER_INTRIN_SSE2
${AOM_AV1_ENCODER_INTRIN_SSE2}
"${AOM_ROOT}/av1/encoder/x86/wedge_utils_sse2.c")
if (CONFIG_ACCOUNTING)
set(AOM_AV1_DECODER_SOURCES
......
......@@ -79,9 +79,7 @@ AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d_cfg.h
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/convolve_avx2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
ifeq ($(CONFIG_CONVOLVE_ROUND)x$(CONFIG_COMPOUND_ROUND),yesx)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_convolve_scale_sse4.c
endif
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
endif
......@@ -161,7 +159,6 @@ AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_warp_plane_sse4.c
endif
endif
ifeq ($(CONFIG_CONVOLVE_ROUND),yes)
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/convolve_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/convolve_2d_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/convolve_2d_sse4.c
......@@ -170,7 +167,6 @@ ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/highbd_convolve_2d_ssse3.c
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/highbd_convolve_2d_avx2.c
endif
endif
ifeq ($(CONFIG_LV_MAP),yes)
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/txb_sse2.c
......
......@@ -575,48 +575,37 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
}
# CONVOLVE_ROUND/COMPOUND_ROUND functions
add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d sse2 avx2/;
add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
specialize qw/av1_convolve_rounding avx2/;
if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d sse2 avx2/;
add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
specialize qw/av1_convolve_rounding avx2/;
if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy sse2/;
add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x sse2/;
add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_y sse2/;
}
add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy sse2/;
add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x sse2/;
add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_y sse2/;
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
specialize qw/av1_convolve_2d_scale sse4_1/;
}
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_scale sse4_1/;
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d sse4_1/;
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d sse4_1/;
if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d_copy sse2/;
}
}
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d_copy sse2/;
}
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
specialize qw/av1_highbd_convolve_2d ssse3 avx2/;
add_proto qw/void av1_highbd_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd";
specialize qw/av1_highbd_convolve_rounding avx2/;
add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
}
}
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
specialize qw/av1_highbd_convolve_2d ssse3 avx2/;
add_proto qw/void av1_highbd_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd";
specialize qw/av1_highbd_convolve_rounding avx2/;
add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
}
# INTRA_EDGE functions
......
......@@ -357,7 +357,6 @@ void av1_convolve_vert_facade_scale(const uint8_t *src, int src_stride,
}
}
#if CONFIG_CONVOLVE_ROUND
void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int bits) {
for (int r = 0; r < h; ++r) {
......@@ -368,190 +367,6 @@ void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
}
}
#if CONFIG_COMPOUND_ROUND
void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < im_h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t sum = 0;
for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
}
im_block[y * im_stride + x] =
clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
}
}
// vertical filter
uint8_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 0;
for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
}
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
}
}
}
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < im_h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t sum = 0;
for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
}
im_block[y * im_stride + x] =
clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
}
}
// vertical filter
uint8_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 0;
for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
}
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
if (conv_params->use_jnt_comp_avg) {
if (conv_params->do_average == 0) {
dst[y * dst_stride + x] = res * conv_params->fwd_offset;
} else {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
}
} else {
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
}
}
}
}
#endif // CONFIG_JNT_COMP
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params) {
uint8_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
for (int y = 0; y < im_h; ++y) {
int x_qn = subpel_x_qn;
for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
int sum = 0;
for (int k = 0; k < filter_params_x->taps; ++k)
sum += x_filter[k] * src_x[k - fo_horiz];
im_block[y * im_stride + x] =
clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
}
src_horiz += src_stride;
}
// vertical filter
const uint8_t *src_vert = im_block + fo_vert * im_stride;
for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn;
for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
const uint8_t *const src_y =
&src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
CONV_BUF_TYPE sum = 0;
for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
}
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
#if CONFIG_JNT_COMP
if (conv_params->use_jnt_comp_avg) {
if (conv_params->do_average == 0) {
dst[y * dst_stride + x] = res * conv_params->fwd_offset;
} else {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
}
} else {
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
}
#else
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
#endif // CONFIG_JNT_COMP
}
src_vert++;
}
}
#else
/* When convolve-round is enabled and compound-round is disabled, we use a
high-precision convolve filter.
Note: For notes on hardware implementations, including the required
......@@ -877,7 +692,6 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
src_vert++;
}
}
#endif // CONFIG_COMPOUND_ROUND
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
......@@ -947,12 +761,6 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
&filter_params_y, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, conv_params);
} else {
#if CONFIG_COMPOUND_ROUND
av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
#else
if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
av1_jnt_convolve_2d_copy(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h,
......@@ -976,7 +784,6 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
}
#endif // CONFIG_COMPOUND_ROUND
}
#else
if (scaled) {
......@@ -985,11 +792,6 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
&filter_params_y, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, conv_params);
} else {
#if CONFIG_COMPOUND_ROUND
av1_convolve_2d(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
#else
// Special case convolve functions should produce the same result as
// av1_convolve_2d.
if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
......@@ -1012,7 +814,6 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
}
#endif // CONFIG_COMPOUND_ROUND
}
#endif // CONFIG_JNT_COMP
}
......@@ -1031,114 +832,6 @@ void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride,
}
}
#if CONFIG_COMPOUND_ROUND
void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
uint16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
// horizontal filter
const uint16_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < im_h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t sum = 0;
for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
}
im_block[y * im_stride + x] =
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, conv_params->round_0), bd);
}
}
// vertical filter
uint16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 0;
for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
}
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
}
}
}
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params, int bd) {
uint16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
(void)bd;
// horizontal filter
const uint16_t *src_horiz = src - fo_vert * src_stride;
for (int y = 0; y < im_h; ++y) {
int x_qn = subpel_x_qn;
for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
int sum = 0;
for (int k = 0; k < filter_params_x->taps; ++k)
sum += x_filter[k] * src_x[k - fo_horiz];
im_block[y * im_stride + x] =
clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
}
src_horiz += src_stride;
}
// vertical filter
uint16_t *src_vert = im_block + fo_vert * im_stride;
for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn;
for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
const uint16_t *const src_y =
&src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
CONV_BUF_TYPE sum = 0;
for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
}
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
}
src_vert++;
}
}
#else
void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
......@@ -1253,7 +946,6 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
src_vert++;
}
}
#endif // CONFIG_COMPOUND_ROUND
void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
......@@ -1318,8 +1010,6 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
}
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_CONVOLVE_ROUND
typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
......
......@@ -99,7 +99,6 @@ static INLINE void av1_get_convolve_filter_params(InterpFilters interp_filters,
struct AV1Common;
void av1_convolve_init(struct AV1Common *cm);
#if CONFIG_CONVOLVE_ROUND
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilters interp_filters, const int subpel_x_q4,
......@@ -113,11 +112,7 @@ static INLINE ConvolveParams get_conv_params_no_round(int ref, int do_average,
conv_params.ref = ref;
conv_params.do_average = do_average;
conv_params.round = CONVOLVE_OPT_NO_ROUND;
#if CONFIG_COMPOUND_ROUND
conv_params.round_0 = FILTER_BITS;
#else
conv_params.round_0 = 5;
#endif
conv_params.round_1 = 0;
conv_params.dst = dst;
conv_params.dst_stride = dst_stride;
......@@ -135,7 +130,6 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
int scaled, ConvolveParams *conv_params,
int bd);
#endif
#endif // CONFIG_CONVOLVE_ROUND
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, InterpFilters interp_filters,
......
......@@ -403,7 +403,6 @@ void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,