diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index 75722321b50a41bac137a50e45e716050c1cb335..778adceebceb5f64933efd1e6879add69948b47d 100755 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl @@ -510,6 +510,10 @@ if (aom_config("CONFIG_JNT_COMP") eq "yes") { add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; specialize qw/av1_jnt_convolve_2d_copy sse2/; + + add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; + + add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; } add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; diff --git a/av1/common/convolve.c b/av1/common/convolve.c index 8fa92750e6ffac0e3c69c998f511852edae0ac84..9f4ec266c177bfb6dfea891921d417194eaed057 100644 --- a/av1/common/convolve.c +++ b/av1/common/convolve.c @@ -724,6 +724,90 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst0, } } +void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst0, + int dst_stride0, int w, int h, + InterpFilterParams *filter_params_x, + InterpFilterParams *filter_params_y, + const int subpel_x_q4, const int subpel_y_q4, + ConvolveParams *conv_params) { + CONV_BUF_TYPE *dst = conv_params->dst; + int dst_stride = conv_params->dst_stride; + const int fo_vert = filter_params_y->taps / 2 - 1; + const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1; + (void)filter_params_x; + (void)subpel_x_q4; + (void)dst0; + (void)dst_stride0; + + // vertical filter + const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( + *filter_params_y, subpel_y_q4 & SUBPEL_MASK); + for (int y = 0; y < h; ++y) { + for (int x = 0; x < w; ++x) { + CONV_BUF_TYPE res = 0; + for (int k = 0; k < filter_params_y->taps; ++k) { + res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; + } + res *= (1 << bits); + if (conv_params->use_jnt_comp_avg) { + if (conv_params->do_average) { + dst[y * dst_stride + x] += res * conv_params->bck_offset; + dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1); + } else { + dst[y * dst_stride + x] = res * conv_params->fwd_offset; + } + } else { + if (conv_params->do_average) + dst[y * dst_stride + x] += res; + else + dst[y * dst_stride + x] = res; + } + } + } +} + +void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst0, + int dst_stride0, int w, int h, + InterpFilterParams *filter_params_x, + InterpFilterParams *filter_params_y, + const int subpel_x_q4, const int subpel_y_q4, + ConvolveParams *conv_params) { + CONV_BUF_TYPE *dst = conv_params->dst; + int dst_stride = conv_params->dst_stride; + const int fo_horiz = filter_params_x->taps / 2 - 1; + const int bits = FILTER_BITS - conv_params->round_1; + (void)filter_params_y; + (void)subpel_y_q4; + (void)dst0; + (void)dst_stride0; + + // horizontal filter + const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( + *filter_params_x, subpel_x_q4 & SUBPEL_MASK); + for (int y = 0; y < h; ++y) { + for (int x = 0; x < w; ++x) { + CONV_BUF_TYPE res = 0; + for (int k = 0; k < filter_params_x->taps; ++k) { + res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; + } + res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0); + if (conv_params->use_jnt_comp_avg) { + if (conv_params->do_average) { + dst[y * dst_stride + x] += res * conv_params->bck_offset; + dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1); + } else { + dst[y * dst_stride + x] = res * conv_params->fwd_offset; + } + } else { + if (conv_params->do_average) + dst[y * dst_stride + x] += res; + else + dst[y * dst_stride + x] = res; + } + } + } +} + void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst0, int dst_stride0, int w, int h, InterpFilterParams *filter_params_x, diff --git a/av1/common/scale.c b/av1/common/scale.c index b19513e3b2c2c426123e73839d1a277efb08865b..a334bae6f7adf6ffbd1192074250afee6899e7c4 100644 --- a/av1/common/scale.c +++ b/av1/common/scale.c @@ -190,11 +190,9 @@ void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, // subpel_x_q4 == 0 && subpel_y_q4 == 0 sf->convolve[0][0][1] = av1_jnt_convolve_2d_copy; // subpel_x_q4 == 0 - // place holder - sf->convolve[0][1][1] = av1_jnt_convolve_2d; + sf->convolve[0][1][1] = av1_jnt_convolve_y; // subpel_y_q4 == 0 - // place holder - sf->convolve[1][0][1] = av1_jnt_convolve_2d; + sf->convolve[1][0][1] = av1_jnt_convolve_x; // subpel_x_q4 != 0 && subpel_y_q4 != 0 sf->convolve[1][1][1] = av1_jnt_convolve_2d; #else