Commit 6a4f708c authored by John Koleszar's avatar John Koleszar

Refactor inter recon functions to support scaling

Ensure that all inter prediction goes through a common code path
that takes scaling into account. Removes a bunch of duplicate
1st/2nd predictor code. Also introduces a 16x8 mode for 8x8
MVs, similar to the 8x4 trick we were doing before. This has an
unexpected effect with EIGHTTAP_SMOOTH, so it's disabled in that
case for now.

Change-Id: Ia053e823a8bc616a988a0af30452e1e75a739cba
parent 9770d564
......@@ -506,10 +506,12 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_2d_only_c),
make_tuple(8, 4, &convolve8_2d_only_c),
make_tuple(8, 8, &convolve8_2d_only_c),
make_tuple(16, 8, &convolve8_2d_only_c),
make_tuple(16, 16, &convolve8_2d_only_c),
make_tuple(4, 4, &convolve8_c),
make_tuple(8, 4, &convolve8_c),
make_tuple(8, 8, &convolve8_c),
make_tuple(16, 8, &convolve8_c),
make_tuple(16, 16, &convolve8_c)));
}
......@@ -523,5 +525,6 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_ssse3),
make_tuple(8, 4, &convolve8_ssse3),
make_tuple(8, 8, &convolve8_ssse3),
make_tuple(16, 8, &convolve8_ssse3),
make_tuple(16, 16, &convolve8_ssse3)));
#endif
......@@ -288,6 +288,15 @@ typedef struct superblockd {
DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]);
} SUPERBLOCKD;
struct scale_factors {
int x_num;
int x_den;
int x_offset_q4;
int y_num;
int y_den;
int y_offset_q4;
};
typedef struct macroblockd {
DECLARE_ALIGNED(16, int16_t, diff[384]); /* from idct diff */
DECLARE_ALIGNED(16, uint8_t, predictor[384]);
......@@ -303,6 +312,8 @@ typedef struct macroblockd {
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
YV12_BUFFER_CONFIG second_pre;
YV12_BUFFER_CONFIG dst;
struct scale_factors scale_factor[2];
struct scale_factors scale_factor_uv[2];
MODE_INFO *prev_mode_info_context;
MODE_INFO *mode_info_context;
......
......@@ -318,25 +318,17 @@ void vp9_convolve_copy(const uint8_t *src, int src_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h) {
if (h == 16) {
if (w == 16 && h == 16) {
vp9_copy_mem16x16(src, src_stride, dst, dst_stride);
} else if (h == 8) {
} else if (w == 8 && h == 8) {
vp9_copy_mem8x8(src, src_stride, dst, dst_stride);
} else if (w == 8) {
} else if (w == 8 && h == 4) {
vp9_copy_mem8x4(src, src_stride, dst, dst_stride);
} else {
// 4x4
int r;
for (r = 0; r < 4; ++r) {
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
#else
*(uint32_t *)dst = *(const uint32_t *)src;
#endif
for (r = h; r > 0; --r) {
memcpy(dst, src, w);
src += src_stride;
dst += dst_stride;
}
......
......@@ -71,6 +71,17 @@ static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) {
setup_block(&blockd[block + 4], stride, v, v2, stride,
((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs);
}
// TODO(jkoleszar): this will move once we're actually scaling.
xd->scale_factor[0].x_num = 1;
xd->scale_factor[0].x_den = 1;
xd->scale_factor[0].y_num = 1;
xd->scale_factor[0].y_den = 1;
xd->scale_factor[0].x_offset_q4 = 0;
xd->scale_factor[0].y_offset_q4 = 0;
xd->scale_factor[1]= xd->scale_factor[0];
xd->scale_factor_uv[0] = xd->scale_factor[0];
xd->scale_factor_uv[1] = xd->scale_factor[1];
}
void vp9_setup_block_dptrs(MACROBLOCKD *xd) {
......
......@@ -23,4 +23,14 @@ typedef union int_mv {
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
struct mv32 {
int32_t row;
int32_t col;
};
typedef union int_mv32 {
uint64_t as_int;
struct mv32 as_mv;
} int_mv32; /* facilitates faster equality tests and copies */
#endif // VP9_COMMON_VP9_MV_H_
This diff is collapsed.
......@@ -16,33 +16,16 @@
struct subpix_fn_table;
extern void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd,
uint8_t *dst_y,
int dst_ystride,
int clamp_mvs);
extern void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_uvstride);
extern void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
uint8_t *dst_y,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_ystride,
int dst_uvstride);
extern void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd,
extern void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
uint8_t *dst_y,
int dst_ystride);
extern void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
extern void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_uvstride);
extern void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd,
extern void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
uint8_t *dst_y,
uint8_t *dst_u,
uint8_t *dst_v,
......@@ -65,22 +48,24 @@ extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd);
extern void vp9_build_inter_predictors_b(BLOCKD *d, int pitch,
struct subpix_fn_table *sppf);
extern void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch,
struct subpix_fn_table *sppf);
extern void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d,
int pitch);
extern void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd,
BLOCKD *d, int pitch);
extern void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd);
extern void vp9_setup_interp_filters(MACROBLOCKD *xd,
INTERPOLATIONFILTERTYPE filter,
VP9_COMMON *cm);
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int_mv *mv_q3,
const struct scale_factors *scale,
int w, int h, int do_avg,
const struct subpix_fn_table *subpix);
void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int_mv *fullpel_mv_q3,
const int_mv *frac_mv_q4,
const struct scale_factors *scale,
int w, int h, int do_avg,
const struct subpix_fn_table *subpix);
#endif // VP9_COMMON_VP9_RECONINTER_H_
......@@ -177,23 +177,14 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) {
xd->dst.y_stride,
xd->dst.uv_stride);
} else {
vp9_build_1st_inter16x16_predictors_mb(xd,
vp9_build_inter16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
vp9_build_2nd_inter16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
}
#if CONFIG_COMP_INTERINTRA_PRED
else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
......
......@@ -2125,22 +2125,14 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
mbmi->mb_skip_coeff = 0;
} else {
vp9_build_1st_inter16x16_predictors_mb(xd,
vp9_build_inter16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
vp9_build_2nd_inter16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride);
}
#if CONFIG_COMP_INTERINTRA_PRED
else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
......
......@@ -713,7 +713,7 @@ void vp9_encode_inter16x16y(MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
......
......@@ -72,7 +72,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
}
vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,
xd->predictor, 16, INT_MAX);
......@@ -292,6 +292,9 @@ static void update_mbgraph_frame_stats
int_mv arf_top_mv, gld_top_mv;
MODE_INFO mi_local;
// Make sure the mi context starts in a consistent state.
memset(&mi_local, 0, sizeof(mi_local));
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
gld_top_mv.as_int = 0;
......
......@@ -2115,9 +2115,22 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x,
BLOCK *be = &x->block[i];
int thisdistortion;
vp9_build_inter_predictors_b(bd, 16, &xd->subpix);
if (xd->mode_info_context->mbmi.second_ref_frame > 0)
vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix);
vp9_build_inter_predictor(*(bd->base_pre) + bd->pre,
bd->pre_stride,
bd->predictor, 16,
&bd->bmi.as_mv[0],
&xd->scale_factor[0],
4, 4, 0 /* no avg */, &xd->subpix);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
vp9_build_inter_predictor(*(bd->base_second_pre) + bd->pre,
bd->pre_stride,
bd->predictor, 16,
&bd->bmi.as_mv[1],
&xd->scale_factor[1],
4, 4, 1 /* avg */, &xd->subpix);
}
vp9_subtract_b(be, bd, 16);
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, bd);
......@@ -2159,14 +2172,25 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
int ib = vp9_i8x8_block[i];
if (labels[ib] == which_label) {
const int use_second_ref =
xd->mode_info_context->mbmi.second_ref_frame > 0;
int which_mv;
int idx = (ib & 8) + ((ib & 2) << 1);
BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
int thisdistortion;
vp9_build_inter_predictors4b(xd, bd, 16);
if (xd->mode_info_context->mbmi.second_ref_frame > 0)
vp9_build_2nd_inter_predictors4b(xd, bd, 16);
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre;
vp9_build_inter_predictor(*base_pre + bd->pre,
bd->pre_stride,
bd->predictor, 16,
&bd->bmi.as_mv[which_mv],
&xd->scale_factor[which_mv],
8, 8, which_mv, &xd->subpix);
}
vp9_subtract_4b_c(be, bd, 16);
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
......@@ -3482,19 +3506,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int sse, var;
int tmp_rate_y, tmp_rate_u, tmp_rate_v;
int tmp_dist_y, tmp_dist_u, tmp_dist_v;
vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
// TODO(jkoleszar): these 2 y/uv should be replaced with one call to
// vp9_build_interintra_16x16_predictors_mb().
vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
}
#endif
vp9_build_1st_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
xd->predictor + 320, 8);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
xd->predictor + 320, 8);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mbuv(xd, xd->predictor + 256,
......@@ -3598,18 +3622,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->dst.y_stride,
xd->dst.uv_stride);
} else {
vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
// TODO(jkoleszar): These y/uv fns can be replaced with their mb
// equivalent
vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
}
#endif
vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
if (is_comp_pred)
vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
&xd->predictor[320], 8);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
......
......@@ -12,6 +12,7 @@
#include <limits.h>
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_quantize.h"
......@@ -42,40 +43,35 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
int mv_row,
int mv_col,
uint8_t *pred) {
int offset;
uint8_t *yptr, *uptr, *vptr;
int omv_row, omv_col;
// Y
yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
xd->subpix.predict[!!(mv_col & 7)][!!(mv_row & 7)][0](
yptr, stride, &pred[0], 16,
xd->subpix.filter_x[(mv_col & 7) << 1], xd->subpix.x_step_q4,
xd->subpix.filter_y[(mv_row & 7) << 1], xd->subpix.y_step_q4,
16, 16);
// U & V
omv_row = mv_row;
omv_col = mv_col;
mv_row >>= 1;
mv_col >>= 1;
const int which_mv = 0;
int_mv subpel_mv;
int_mv fullpel_mv;
subpel_mv.as_mv.row = mv_row;
subpel_mv.as_mv.col = mv_col;
// TODO(jkoleszar): Make this rounding consistent with the rest of the code
fullpel_mv.as_mv.row = (mv_row >> 1) & ~7;
fullpel_mv.as_mv.col = (mv_col >> 1) & ~7;
vp9_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
&subpel_mv,
&xd->scale_factor[which_mv],
16, 16, which_mv, &xd->subpix);
stride = (stride + 1) >> 1;
offset = (mv_row >> 3) * stride + (mv_col >> 3);
uptr = u_mb_ptr + offset;
vptr = v_mb_ptr + offset;
xd->subpix.predict[!!(omv_col & 15)][!!(omv_row & 15)][0](
uptr, stride, &pred[256], 8,
xd->subpix.filter_x[(omv_col & 15)], xd->subpix.x_step_q4,
xd->subpix.filter_y[(omv_row & 15)], xd->subpix.y_step_q4,
8, 8);
xd->subpix.predict[!!(omv_col & 15)][!!(omv_row & 15)][0](
vptr, stride, &pred[320], 8,
xd->subpix.filter_x[(omv_col & 15)], xd->subpix.x_step_q4,
xd->subpix.filter_y[(omv_row & 15)], xd->subpix.y_step_q4,
8, 8);
vp9_build_inter_predictor_q4(u_mb_ptr, stride,
&pred[256], 8,
&fullpel_mv, &subpel_mv,
&xd->scale_factor_uv[which_mv],
8, 8, which_mv, &xd->subpix);
vp9_build_inter_predictor_q4(v_mb_ptr, stride,
&pred[320], 8,
&fullpel_mv, &subpel_mv,
&xd->scale_factor_uv[which_mv],
8, 8, which_mv, &xd->subpix);
}
void vp9_temporal_filter_apply_c(uint8_t *frame1,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment