Commit c40cb70c authored by Debargha Mukherjee's avatar Debargha Mukherjee

Lower precision compound Blend

Adds an experiment for lower precision transform blend
in order to enable 8- and 10- bit blends to be more
efficiently vectorizable.

Also disable av1_convolve_y function temporarily since
they are incompatible with 7 - round_0 - round_1 being
negative.

Also disable SIMD version of warp if round_1 > 0, because
of incompatibility.

Noise level difference in coding efficiency:
+0.014% (lowres, 15 frames cpu-used = 1)

Change-Id: Ic6b9233901ca35ab735b0a5cdb850f96d3cd70ab
parent 98c8fcc1
......@@ -456,7 +456,14 @@ void av1_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst0,
for (int k = 0; k < filter_params_y->taps; ++k) {
res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
}
#if CONFIG_LOWPRECISION_BLEND
if (bits >= 0)
res *= (1 << bits);
else
res = ROUND_POWER_OF_TWO(res, bits);
#else
res *= (1 << bits);
#endif // CONFIG_LOWPRECISION_BLEND
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
......@@ -740,7 +747,14 @@ void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst0,
for (int k = 0; k < filter_params_y->taps; ++k) {
res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
}
#if CONFIG_LOWPRECISION_BLEND
if (bits >= 0)
res *= (1 << bits);
else
res = ROUND_POWER_OF_TWO(res, bits);
#else
res *= (1 << bits);
#endif // CONFIG_LOWPRECISION_BLEND
if (conv_params->use_jnt_comp_avg) {
if (conv_params->do_average) {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
......
......@@ -58,6 +58,11 @@ static INLINE ConvolveParams get_conv_params(int ref, int do_average,
conv_params.round = CONVOLVE_OPT_ROUND;
conv_params.plane = plane;
conv_params.do_post_rounding = 0;
conv_params.round_0 = 5;
conv_params.round_1 = 0;
conv_params.is_compound = 0;
conv_params.dst = NULL;
conv_params.dst_stride = 0;
return conv_params;
}
......
......@@ -26,6 +26,10 @@
#include "av1/common/onyxc_int.h"
#include "av1/common/obmc.h"
#if CONFIG_LOWPRECISION_BLEND
#define LOWPRECISION_BLEND_BITS 4 // reduction in precision bits
#endif // CONFIG_LOWPRECISION_BLEND
// This function will determine whether or not to create a warped
// prediction.
static INLINE int allow_warp(const MODE_INFO *const mi,
......@@ -86,16 +90,14 @@ static INLINE void av1_make_inter_predictor(
pre_buf->buf0, pre_buf->width, pre_buf->height,
pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
pd->subsampling_x, pd->subsampling_y, conv_params);
return;
}
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
} else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, conv_params, interp_filters, xs, ys,
xd->bd);
return;
} else {
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
h, conv_params, interp_filters, xs, ys);
}
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
h, conv_params, interp_filters, xs, ys);
}
#define NSMOOTHERS 1
......@@ -993,6 +995,10 @@ static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
assert(w <= 8 && h <= 8);
ConvolveParams conv_params = get_conv_params_no_round(
0, 0, plane, tmp_dst, tmp_dst_stride, is_compound);
#if CONFIG_LOWPRECISION_BLEND
if (is_masked_compound_type(mi->mbmi.interinter_compound_type))
conv_params.round_1 = LOWPRECISION_BLEND_BITS;
#endif // CONFIG_LOWPRECISION_BLEND
#if CONFIG_JNT_COMP
conv_params.use_jnt_comp_avg = 0;
#endif // CONFIG_JNT_COMP
......@@ -1191,6 +1197,10 @@ static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
ConvolveParams conv_params = get_conv_params_no_round(
ref, ref, plane, tmp_dst, MAX_SB_SIZE, is_compound);
#if CONFIG_LOWPRECISION_BLEND
if (is_masked_compound_type(mi->mbmi.interinter_compound_type))
conv_params.round_1 = LOWPRECISION_BLEND_BITS;
#endif // CONFIG_LOWPRECISION_BLEND
#if CONFIG_JNT_COMP
av1_jnt_comp_weight_assign(cm, &mi->mbmi, 0, &conv_params.fwd_offset,
&conv_params.bck_offset,
......
......@@ -189,8 +189,14 @@ void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
#if CONFIG_JNT_COMP
// subpel_x_q4 == 0 && subpel_y_q4 == 0
sf->convolve[0][0][1] = av1_jnt_convolve_2d_copy;
// subpel_x_q4 == 0
// subpel_x_q4 == 0
#if CONFIG_LOWPRECISION_BLEND
// NOTE: The av1_jnt_convolve_y() function is incorrect currently.
// So default to the 2d version.
sf->convolve[0][1][1] = av1_jnt_convolve_2d;
#else
sf->convolve[0][1][1] = av1_jnt_convolve_y;
#endif // CONFIG_LOWPRECISION_BLEND
// subpel_y_q4 == 0
sf->convolve[1][0][1] = av1_jnt_convolve_x;
// subpel_x_q4 != 0 && subpel_y_q4 != 0
......@@ -198,8 +204,14 @@ void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
#else
// subpel_x_q4 == 0 && subpel_y_q4 == 0
sf->convolve[0][0][1] = av1_convolve_2d_copy;
// subpel_x_q4 == 0
// subpel_x_q4 == 0
#if CONFIG_LOWPRECISION_BLEND
// NOTE: The av1_convolve_y() function is incorrect currently.
// So default to the 2d versions.
sf->convolve[0][1][1] = av1_convolve_2d;
#else
sf->convolve[0][1][1] = av1_convolve_y;
#endif // CONFIG_LOWPRECISION_BLEND
// subpel_y_q4 == 0
sf->convolve[1][0][1] = av1_convolve_x;
// subpel_x_q4 != 0 && subpel_y_q4 != 0
......
......@@ -570,10 +570,16 @@ static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,
const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
p_width, p_height, p_stride, subsampling_x,
subsampling_y, bd, conv_params, alpha, beta, gamma,
delta);
if (conv_params->round_1)
av1_highbd_warp_affine_c(mat, ref, width, height, stride, pred, p_col,
p_row, p_width, p_height, p_stride, subsampling_x,
subsampling_y, bd, conv_params, alpha, beta, gamma,
delta);
else
av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
p_width, p_height, p_stride, subsampling_x,
subsampling_y, bd, conv_params, alpha, beta, gamma,
delta);
}
static int64_t highbd_frame_error(const uint16_t *const ref, int stride,
......@@ -870,9 +876,14 @@ static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref,
const int16_t gamma = wm->gamma;
const int16_t delta = wm->delta;
av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width,
p_height, p_stride, subsampling_x, subsampling_y, conv_params,
alpha, beta, gamma, delta);
if (conv_params->round_1)
av1_warp_affine_c(mat, ref, width, height, stride, pred, p_col, p_row,
p_width, p_height, p_stride, subsampling_x, subsampling_y,
conv_params, alpha, beta, gamma, delta);
else
av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
p_width, p_height, p_stride, subsampling_x, subsampling_y,
conv_params, alpha, beta, gamma, delta);
}
static int64_t frame_error(const uint8_t *const ref, int stride,
......
......@@ -129,6 +129,7 @@ set(CONFIG_LOOPFILTERING_ACROSS_TILES 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LOOPFILTERING_ACROSS_TILES_EXT 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LOOPFILTER_LEVEL 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LOOP_RESTORATION 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LOWPRECISION_BLEND 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LV_MAP 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_MAX_TILE 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_MFMV 1 CACHE NUMBER "AV1 experiment flag.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment