Commit 0d3c3d3c authored by Deb Mukherjee's avatar Deb Mukherjee

Adds high bitdepth convolve, interpred & scaling

Change-Id: Ie51c352a6b250547207cbc1ebba833a01ed053e3
parent d3a7e677
This diff is collapsed.
This diff is collapsed.
......@@ -23,6 +23,14 @@ typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4,
int w, int h);
#if CONFIG_VP9_HIGHBITDEPTH
typedef void (*high_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int bd);
#endif
#ifdef __cplusplus
} // extern "C"
#endif
......
......@@ -63,6 +63,53 @@ static void build_mc_border(const uint8_t *src, int src_stride,
} while (--b_h);
}
#if CONFIG_VP9_HIGHBITDEPTH
static void high_build_mc_border(const uint8_t *src8, int src_stride,
uint16_t *dst, int dst_stride,
int x, int y, int b_w, int b_h,
int w, int h) {
// Get a pointer to the start of the real data for this row.
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
const uint16_t *ref_row = src - x - y * src_stride;
if (y >= h)
ref_row += (h - 1) * src_stride;
else if (y > 0)
ref_row += y * src_stride;
do {
int right = 0, copy;
int left = x < 0 ? -x : 0;
if (left > b_w)
left = b_w;
if (x + b_w > w)
right = x + b_w - w;
if (right > b_w)
right = b_w;
copy = b_w - left - right;
if (left)
vpx_memset16(dst, ref_row[0], left);
if (copy)
memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t));
if (right)
vpx_memset16(dst + left + copy, ref_row[w - 1], right);
dst += dst_stride;
++y;
if (y > 0 && y < h)
ref_row += src_stride;
} while (--b_h);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static void inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int subpel_x,
......@@ -97,6 +144,42 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4);
}
#if CONFIG_VP9_HIGHBITDEPTH
static void high_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int subpel_x,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys, int bd) {
sf->high_predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
}
void vp9_high_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *src_mv,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
enum mv_precision precision,
int x, int y, int bd) {
const int is_q4 = precision == MV_PRECISION_Q4;
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
is_q4 ? src_mv->col : src_mv->col * 2 };
MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf);
const int subpel_x = mv.col & SUBPEL_MASK;
const int subpel_y = mv.row & SUBPEL_MASK;
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static INLINE int round_mv_comp_q4(int value) {
return (value < 0 ? value - 2 : value + 2) / 4;
}
......@@ -222,8 +305,19 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
+ (scaled_mv.col >> SUBPEL_BITS);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
xd->bd);
} else {
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
}
#else
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
......@@ -393,16 +487,64 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
// Extend the border.
build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0 + 1,
x0, y0, x1 - x0 + 1, y1 - y0 + 1, frame_width,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_build_mc_border(buf_ptr1,
pre_buf->stride,
xd->mc_buf_high,
x1 - x0 + 1,
x0,
y0,
x1 - x0 + 1,
y1 - y0 + 1,
frame_width,
frame_height);
buf_stride = x1 - x0 + 1;
buf_ptr = CONVERT_TO_BYTEPTR(xd->mc_buf_high) +
y_pad * 3 * buf_stride + x_pad * 3;
} else {
build_mc_border(buf_ptr1,
pre_buf->stride,
xd->mc_buf,
x1 - x0 + 1,
x0,
y0,
x1 - x0 + 1,
y1 - y0 + 1,
frame_width,
frame_height);
buf_stride = x1 - x0 + 1;
buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
}
#else
build_mc_border(buf_ptr1,
pre_buf->stride,
xd->mc_buf,
x1 - x0 + 1,
x0,
y0,
x1 - x0 + 1,
y1 - y0 + 1,
frame_width,
frame_height);
buf_stride = x1 - x0 + 1;
buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
} else {
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys);
}
#else
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
......
......@@ -39,6 +39,17 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
enum mv_precision precision,
int x, int y);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_high_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *mv_q3,
const struct scale_factors *sf,
int w, int h, int do_avg,
const InterpKernel *kernel,
enum mv_precision precision,
int x, int y, int bd);
#endif
static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
const struct scale_factors *sf) {
const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
......
......@@ -606,6 +606,33 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_high_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
specialize qw/vp9_high_dc_128_predictor_32x32/;
#
# Sub Pixel Filters
#
add_proto qw/void vp9_high_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve_copy/;
add_proto qw/void vp9_high_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve_avg/;
add_proto qw/void vp9_high_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve8/, "$sse2_x86_64";
add_proto qw/void vp9_high_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve8_horiz/, "$sse2_x86_64";
add_proto qw/void vp9_high_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve8_vert/, "$sse2_x86_64";
add_proto qw/void vp9_high_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve8_avg/, "$sse2_x86_64";
add_proto qw/void vp9_high_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve8_avg_horiz/, "$sse2_x86_64";
add_proto qw/void vp9_high_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp9_high_convolve8_avg_vert/, "$sse2_x86_64";
#
# dct
#
......
......@@ -43,9 +43,16 @@ MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
return res;
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h,
int use_high) {
#else
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h) {
#endif
if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
sf->x_scale_fp = REF_INVALID_SCALE;
sf->y_scale_fp = REF_INVALID_SCALE;
......@@ -111,4 +118,48 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
// 2D subpel motion always gets filtered in both directions
sf->predict[1][1][0] = vp9_convolve8;
sf->predict[1][1][1] = vp9_convolve8_avg;
#if CONFIG_VP9_HIGHBITDEPTH
if (use_high) {
if (sf->x_step_q4 == 16) {
if (sf->y_step_q4 == 16) {
// No scaling in either direction.
sf->high_predict[0][0][0] = vp9_high_convolve_copy;
sf->high_predict[0][0][1] = vp9_high_convolve_avg;
sf->high_predict[0][1][0] = vp9_high_convolve8_vert;
sf->high_predict[0][1][1] = vp9_high_convolve8_avg_vert;
sf->high_predict[1][0][0] = vp9_high_convolve8_horiz;
sf->high_predict[1][0][1] = vp9_high_convolve8_avg_horiz;
} else {
// No scaling in x direction. Must always scale in the y direction.
sf->high_predict[0][0][0] = vp9_high_convolve8_vert;
sf->high_predict[0][0][1] = vp9_high_convolve8_avg_vert;
sf->high_predict[0][1][0] = vp9_high_convolve8_vert;
sf->high_predict[0][1][1] = vp9_high_convolve8_avg_vert;
sf->high_predict[1][0][0] = vp9_high_convolve8;
sf->high_predict[1][0][1] = vp9_high_convolve8_avg;
}
} else {
if (sf->y_step_q4 == 16) {
// No scaling in the y direction. Must always scale in the x direction.
sf->high_predict[0][0][0] = vp9_high_convolve8_horiz;
sf->high_predict[0][0][1] = vp9_high_convolve8_avg_horiz;
sf->high_predict[0][1][0] = vp9_high_convolve8;
sf->high_predict[0][1][1] = vp9_high_convolve8_avg;
sf->high_predict[1][0][0] = vp9_high_convolve8_horiz;
sf->high_predict[1][0][1] = vp9_high_convolve8_avg_horiz;
} else {
// Must always scale in both directions.
sf->high_predict[0][0][0] = vp9_high_convolve8;
sf->high_predict[0][0][1] = vp9_high_convolve8_avg;
sf->high_predict[0][1][0] = vp9_high_convolve8;
sf->high_predict[0][1][1] = vp9_high_convolve8_avg;
sf->high_predict[1][0][0] = vp9_high_convolve8;
sf->high_predict[1][0][1] = vp9_high_convolve8_avg;
}
}
// 2D subpel motion always gets filtered in both directions.
sf->high_predict[1][1][0] = vp9_high_convolve8;
sf->high_predict[1][1][1] = vp9_high_convolve8_avg;
}
#endif
}
......@@ -32,13 +32,23 @@ struct scale_factors {
int (*scale_value_y)(int val, const struct scale_factors *sf);
convolve_fn_t predict[2][2][2]; // horiz, vert, avg
#if CONFIG_VP9_HIGHBITDEPTH
high_convolve_fn_t high_predict[2][2][2]; // horiz, vert, avg
#endif
};
MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h,
int use_high);
#else
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h);
#endif
static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_INVALID_SCALE &&
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -1265,10 +1265,18 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
for (i = 0; i < REFS_PER_FRAME; ++i) {
RefBuffer *const ref_buf = &cm->frame_refs[i];
#if CONFIG_VP9_HIGHBITDEPTH
vp9_setup_scale_factors_for_frame(&ref_buf->sf,
ref_buf->buf->y_crop_width,
ref_buf->buf->y_crop_height,
cm->width, cm->height,
cm->use_highbitdepth);
#else
vp9_setup_scale_factors_for_frame(&ref_buf->sf,
ref_buf->buf->y_crop_width,
ref_buf->buf->y_crop_height,
cm->width, cm->height);
#endif
if (vp9_is_scaled(&ref_buf->sf))
vp9_extend_frame_borders(ref_buf->buf);
}
......
......@@ -2767,10 +2767,17 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
ref_buf->buf = buf;
ref_buf->idx = idx;
#if CONFIG_VP9_HIGHBITDEPTH
vp9_setup_scale_factors_for_frame(&ref_buf->sf,
buf->y_crop_width, buf->y_crop_height,
cm->width, cm->height,
(buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
1 : 0);
#else
vp9_setup_scale_factors_for_frame(&ref_buf->sf,
buf->y_crop_width, buf->y_crop_height,
cm->width, cm->height);
#endif
if (vp9_is_scaled(&ref_buf->sf))
vp9_extend_frame_borders(buf);
}
......
......@@ -454,12 +454,20 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
// In spatial svc the scaling factors might be less then 1/2. So we will use
// non-normative scaling.
int frame_used = 0;
#if CONFIG_VP9_HIGHBITDEPTH
vp9_setup_scale_factors_for_frame(&sf,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
cm->use_highbitdepth);
#else
vp9_setup_scale_factors_for_frame(&sf,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height);
#endif
for (frame = 0; frame < frames_to_blur; ++frame) {
if (cm->mi_cols * MI_SIZE != frames[frame]->y_width ||
cm->mi_rows * MI_SIZE != frames[frame]->y_height) {
......@@ -481,11 +489,20 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
}
} else {
// ARF is produced at the native frame size and resized when coded.
#if CONFIG_VP9_HIGHBITDEPTH
vp9_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
cm->use_highbitdepth);
#else
vp9_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height);
#endif
}
temporal_filter_iterate_c(cpi, frames, frames_to_blur,
......
......@@ -91,6 +91,8 @@ endif
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_intrapred_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm
endif
# common (c)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment