Commit 505f0068 authored by Fergus Simpson's avatar Fergus Simpson Committed by Debargha Mukherjee
Browse files

Fix frame scaling prediction

Use higher precision offsets for more accurate predictor
generation when references are at a different scale from
the coded frame.

Change-Id: I4c2c0ec67fa4824273cb3bd072211f41ac7802e8
parent 15836145
......@@ -41,6 +41,29 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *x_filters, int x0_qn,
int x_step_qn, int w, int h) {
int x, y;
src -= SUBPEL_TAPS / 2 - 1;
for (y = 0; y < h; ++y) {
int x_qn = x0_qn;
for (x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS]; // q8
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *const x_filter = x_filters[x_filter_idx];
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
x_qn += x_step_qn;
}
src += src_stride;
dst += dst_stride;
}
}
static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *x_filters, int x0_q4,
......@@ -63,6 +86,30 @@ static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_avg_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *x_filters, int x0_qn,
int x_step_qn, int w, int h) {
int x, y;
src -= SUBPEL_TAPS / 2 - 1;
for (y = 0; y < h; ++y) {
int x_qn = x0_qn;
for (x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *const x_filter = x_filters[x_filter_idx];
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
dst[x] = ROUND_POWER_OF_TWO(
dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
x_qn += x_step_qn;
}
src += src_stride;
dst += dst_stride;
}
}
static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *y_filters, int y0_q4,
......@@ -86,6 +133,31 @@ static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *y_filters, int y0_qn,
int y_step_qn, int w, int h) {
int x, y;
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
for (x = 0; x < w; ++x) {
int y_qn = y0_qn;
for (y = 0; y < h; ++y) {
const unsigned char *src_y =
&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];
const int16_t *const y_filter =
y_filters[(y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS];
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k)
sum += src_y[k * src_stride] * y_filter[k];
dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
y_qn += y_step_qn;
}
++src;
++dst;
}
}
static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *y_filters, int y0_q4,
......@@ -112,6 +184,34 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_avg_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *y_filters, int y0_qn,
int y_step_qn, int w, int h) {
int x, y;
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
for (x = 0; x < w; ++x) {
int y_qn = y0_qn;
for (y = 0; y < h; ++y) {
const unsigned char *src_y =
&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];
const int16_t *const y_filter =
y_filters[(y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS];
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k)
sum += src_y[k * src_stride] * y_filter[k];
dst[y * dst_stride] = ROUND_POWER_OF_TWO(
dst[y * dst_stride] +
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
1);
y_qn += y_step_qn;
}
++src;
++dst;
}
}
static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const InterpKernel *const x_filters,
int x0_q4, int x_step_q4,
......@@ -146,6 +246,41 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
dst_stride, y_filters, y0_q4, y_step_q4, w, h);
}
static void convolve_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *const x_filters, int x0_qn,
int x_step_qn, const InterpKernel *const y_filters,
int y0_qn, int y_step_qn, int w, int h) {
// TODO(afergs): Update comment here
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
// (2) Interpolate temp vertically to derive the sub-pixel result.
// Deriving the maximum number of rows in the temp buffer (135):
// --Smallest scaling factor is x1/2 ==> y_step_qn = 32 (Normative).
// --Largest block size is 64x64 pixels.
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
// original frame (in 1/16th pixel units).
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];
int intermediate_height =
(((h - 1) * y_step_qn + y0_qn) >> SCALE_SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= MAX_SB_SIZE);
assert(h <= MAX_SB_SIZE);
assert(y_step_qn <= SCALE_SUBPEL_BITS * 2);
assert(x_step_qn <= SCALE_SUBPEL_BITS * 2);
convolve_horiz_scale_c(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
temp, MAX_SB_SIZE, x_filters, x0_qn, x_step_qn, w,
intermediate_height);
convolve_vert_scale_c(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,
dst, dst_stride, y_filters, y0_qn, y_step_qn, w, h);
}
static const InterpKernel *get_filter_base(const int16_t *filter) {
// NOTE: This assumes that the filter table is 256-byte aligned.
// TODO(agrange) Modify to make independent of table alignment.
......@@ -171,6 +306,21 @@ void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
w, h);
}
void aom_convolve8_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int subpel_x,
int x_step_qn, const int16_t *filter_y,
int subpel_y, int y_step_qn, int w, int h) {
const InterpKernel *const filters_x = get_filter_base(filter_x);
(void)subpel_y;
(void)filter_y;
(void)y_step_qn;
convolve_horiz_scale_c(src, src_stride, dst, dst_stride, filters_x, subpel_x,
x_step_qn, w, h);
}
void aom_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
......@@ -186,6 +336,22 @@ void aom_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
x_step_q4, w, h);
}
void aom_convolve8_avg_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int subpel_x,
int x_step_qn, const int16_t *filter_y,
int subpel_y, int y_step_qn, int w,
int h) {
const InterpKernel *const filters_x = get_filter_base(filter_x);
(void)subpel_y;
(void)filter_y;
(void)y_step_qn;
convolve_avg_horiz_scale_c(src, src_stride, dst, dst_stride, filters_x,
subpel_x, x_step_qn, w, h);
}
void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
......@@ -201,6 +367,21 @@ void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
w, h);
}
void aom_convolve8_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int subpel_x,
int x_step_qn, const int16_t *filter_y,
int subpel_y, int y_step_qn, int w, int h) {
const InterpKernel *const filters_y = get_filter_base(filter_y);
(void)subpel_x;
(void)filter_x;
(void)x_step_qn;
convolve_vert_scale_c(src, src_stride, dst, dst_stride, filters_y, subpel_y,
y_step_qn, w, h);
}
void aom_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
......@@ -216,6 +397,21 @@ void aom_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
y_step_q4, w, h);
}
void aom_convolve8_avg_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int subpel_x,
int x_step_qn, const int16_t *filter_y,
int subpel_y, int y_step_qn, int w, int h) {
const InterpKernel *const filters_y = get_filter_base(filter_y);
(void)subpel_x;
(void)filter_x;
(void)x_step_qn;
convolve_avg_vert_scale_c(src, src_stride, dst, dst_stride, filters_y,
subpel_y, y_step_qn, w, h);
}
void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const int16_t *filter_x,
int x_step_q4, const int16_t *filter_y, int y_step_q4,
......@@ -230,6 +426,19 @@ void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
filters_y, y0_q4, y_step_q4, w, h);
}
void aom_convolve8_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int subpel_x, int x_step_qn,
const int16_t *filter_y, int subpel_y, int y_step_qn,
int w, int h) {
const InterpKernel *const filters_x = get_filter_base(filter_x);
const InterpKernel *const filters_y = get_filter_base(filter_y);
convolve_scale_c(src, src_stride, dst, dst_stride, filters_x, subpel_x,
x_step_qn, filters_y, subpel_y, y_step_qn, w, h);
}
void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const int16_t *filter_x,
int x_step_q4, const int16_t *filter_y, int y_step_q4,
......@@ -245,6 +454,22 @@ void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
h);
}
void aom_convolve8_avg_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int subpel_x,
int x_step_qn, const int16_t *filter_y,
int subpel_y, int y_step_qn, int w, int h) {
/* Fixed size intermediate buffer places limits on parameters. */
DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);
assert(w <= MAX_SB_SIZE);
assert(h <= MAX_SB_SIZE);
aom_convolve8_scale_c(src, src_stride, temp, MAX_SB_SIZE, filter_x, subpel_x,
x_step_qn, filter_y, subpel_y, y_step_qn, w, h);
aom_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w,
h);
}
void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const int16_t *filter_x,
int filter_x_stride, const int16_t *filter_y,
......@@ -332,6 +557,63 @@ void aom_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
filter_y, y_step_q4, w, h);
}
/*
void aom_scaled_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_qn,
const int16_t *filter_y, int y_step_qn, int w,
int h) {
aom_convolve8_horiz_scale_c(src, src_stride, dst, dst_stride, filter_x,
x_step_qn, filter_y, y_step_qn, w, h);
}
void aom_scaled_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_qn,
const int16_t *filter_y, int y_step_qn, int w,
int h) {
aom_convolve8_vert_scale_c(src, src_stride, dst, dst_stride, filter_x,
x_step_qn, filter_y, y_step_qn, w, h);
}
void aom_scaled_2d_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_qn,
const int16_t *filter_y, int y_step_qn, int w,
int h) {
aom_convolve8_scale_c(src, src_stride, dst, dst_stride, filter_x, x_step_qn,
filter_y, y_step_qn, w, h);
}
void aom_scaled_avg_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_qn,
const int16_t *filter_y, int y_step_qn, int w,
int h) {
aom_convolve8_avg_horiz_scale_c(src, src_stride, dst, dst_stride, filter_x,
x_step_qn, filter_y, y_step_qn, w, h);
}
void aom_scaled_avg_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_qn,
const int16_t *filter_y, int y_step_qn, int w,
int h) {
aom_convolve8_avg_vert_scale_c(src, src_stride, dst, dst_stride, filter_x,
x_step_qn, filter_y, y_step_qn, w, h);
}
void aom_scaled_avg_2d_scale_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_qn,
const int16_t *filter_y, int y_step_qn, int w,
int h) {
aom_convolve8_avg_scale_c(src, src_stride, dst, dst_stride, filter_x,
x_step_qn, filter_y, y_step_qn, w, h);
}
*/
// TODO(afergs): Make sure this works too
#if CONFIG_LOOP_RESTORATION
static void convolve_add_src_horiz(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
......@@ -569,6 +851,7 @@ void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride,
}
#endif // CONFIG_LOOP_RESTORATION
// TODO(afergs): Make sure this works too
#if CONFIG_HIGHBITDEPTH
static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
uint8_t *dst8, ptrdiff_t dst_stride,
......
......@@ -149,20 +149,27 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
#
# Sub Pixel Filters
#
add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
specialize qw/aom_convolve_copy sse2 /;
specialize qw/aom_convolve_avg sse2 /;
......
......@@ -25,6 +25,12 @@ extern "C" {
#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
#define SUBPEL_TAPS 8
#define SCALE_SUBPEL_BITS 10
#define SCALE_SUBPEL_SHIFTS (1 << SCALE_SUBPEL_BITS)
#define SCALE_SUBPEL_MASK (SCALE_SUBPEL_SHIFTS - 1)
#define SCALE_EXTRA_BITS (SCALE_SUBPEL_BITS - SUBPEL_BITS)
#define SCALE_EXTRA_OFF ((1 << SCALE_EXTRA_BITS) / 2)
typedef int16_t InterpKernel[SUBPEL_TAPS];
#define BIL_SUBPEL_BITS 3
......
......@@ -55,6 +55,39 @@ void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
}
}
void av1_convolve_horiz_scale(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_qn, int x_step_qn,
ConvolveParams *conv_params) {
int x, y;
int filter_size = filter_params.taps;
assert(conv_params->round == CONVOLVE_OPT_ROUND);
src -= filter_size / 2 - 1;
for (y = 0; y < h; ++y) {
int x_qn = subpel_x_qn;
for (x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(filter_params, x_filter_idx);
int k, sum = 0;
for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
if (conv_params->do_average)
dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
else
dst[x] = sum;
x_qn += x_step_qn;
}
src += src_stride;
dst += dst_stride;
}
}
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
......@@ -87,6 +120,41 @@ void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
}
}
void av1_convolve_vert_scale(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_y_qn, int y_step_qn,
ConvolveParams *conv_params) {
int x, y;
int filter_size = filter_params.taps;
assert(conv_params->round == CONVOLVE_OPT_ROUND);
src -= src_stride * (filter_size / 2 - 1);
for (x = 0; x < w; ++x) {
int y_qn = subpel_y_qn;
for (y = 0; y < h; ++y) {
const uint8_t *const src_y =
&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(filter_params, y_filter_idx);
int k, sum = 0;
for (k = 0; k < filter_size; ++k)
sum += src_y[k * src_stride] * y_filter[k];
sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
if (conv_params->do_average)
dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
else
dst[y * dst_stride] = sum;
y_qn += y_step_qn;
}
++src;
++dst;
}
}
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
ConvolveParams *conv_params) {
......@@ -152,6 +220,28 @@ void av1_convolve_horiz_facade_c(const uint8_t *src, int src_stride,
}
}
void av1_convolve_horiz_facade_scale(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_qn, int x_step_qn,
ConvolveParams *conv_params) {
assert(conv_params->round == CONVOLVE_OPT_ROUND);
if (filter_params.taps == SUBPEL_TAPS) {
const int16_t *filter_x = av1_get_interp_filter_subpel_kernel(
filter_params, subpel_x_qn >> SCALE_EXTRA_BITS);
if (conv_params->do_average == 0)
aom_convolve8_horiz_scale(src, src_stride, dst, dst_stride, filter_x,
subpel_x_qn, x_step_qn, NULL, 0, -1, w, h);
else
aom_convolve8_avg_horiz_scale(src, src_stride, dst, dst_stride, filter_x,
subpel_x_qn, x_step_qn, NULL, 0, -1, w, h);
} else {
av1_convolve_horiz_scale(src, src_stride, dst, dst_stride, w, h,
filter_params, subpel_x_qn, x_step_qn,
conv_params);
}
}
void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
......@@ -196,6 +286,28 @@ void av1_convolve_vert_facade_c(const uint8_t *src, int src_stride,
}
}
void av1_convolve_vert_facade_scale(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_y_qn, int y_step_qn,
ConvolveParams *conv_params) {
assert(conv_params->round == CONVOLVE_OPT_ROUND);
if (filter_params.taps == SUBPEL_TAPS) {
const int16_t *filter_y = av1_get_interp_filter_subpel_kernel(
filter_params, subpel_y_qn >> SCALE_EXTRA_BITS);
if (conv_params->do_average == 0) {
aom_convolve8_vert_scale(src, src_stride, dst, dst_stride, NULL, 0, -1,
filter_y, subpel_y_qn, y_step_qn, w, h);
} else {
aom_convolve8_avg_vert_scale(src, src_stride, dst, dst_stride, NULL, 0,
-1, filter_y, subpel_y_qn, y_step_qn, w, h);
}
} else {
av1_convolve_vert_scale(src, src_stride, dst, dst_stride, w, h,
filter_params, subpel_y_qn, y_step_qn, conv_params);
}
}
#if CONFIG_CONVOLVE_ROUND
void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int bits) {
......@@ -587,8 +699,8 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
ConvolveParams *conv_params,
ConvolveFunc convolve_horiz,
ConvolveFunc convolve_vert) {
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
int ignore_horiz = x_step_q4 == SUBPEL_SHIFTS && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == SUBPEL_SHIFTS && subpel_y_q4 == 0;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
......@@ -701,6 +813,135 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
}
}
static void convolve_scale_helper(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
const InterpFilter *interp_filter,
#else
const InterpFilter interp_filter,
#endif
const int subpel_x_qn, int x_step_qn,
const int subpel_y_qn, int y_step_qn,
ConvolveParams *conv_params,
ConvolveFunc convolve_horiz,
ConvolveFunc convolve_vert) {
int ignore_horiz = x_step_qn == SCALE_SUBPEL_SHIFTS && subpel_x_qn == 0;
int ignore_vert = y_step_qn == SCALE_SUBPEL_SHIFTS && subpel_y_qn == 0;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
InterpFilterParams filter_params;
#else
InterpFilterParams filter_params =