Commit 97137443 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Move superres 1D convolve functions to convolve.c

This is to ease integration with striped loop restoration

Change-Id: If10e16656a3fe42bcca3e7238e4e729c962f2bb8
parent 8fdcf6e3
......@@ -50,6 +50,15 @@ specialize qw/av1_convolve_horiz ssse3/;
add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_vert ssse3/;
if (aom_config("CONFIG_FRAME_SUPERRES") eq "yes") {
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
add_proto qw/void av1_convolve_horiz_rs/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int interp_taps, const int x0_qn, const int x_step_qn";
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_convolve_horiz_rs/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int interp_taps, const int x0_qn, const int x_step_qn, int bd";
}
}
}
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_convolve_init/, "void";
specialize qw/av1_highbd_convolve_init sse4_1/;
......
......@@ -25,6 +25,62 @@
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
#define MAX_STEP (32)
#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
#define UPSCALE_PROC_UNIT 64 // Source step (roughly)
#define UPSCALE_PROC_UNIT_SCALE (UPSCALE_PROC_UNIT / SCALE_NUMERATOR)
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const int16_t *x_filters, int interp_taps,
const int x0_qn, const int x_step_qn) {
int x, y;
src -= interp_taps / 2 - 1;
for (y = 0; y < h; ++y) {
int x_qn = x0_qn;
for (x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
const int x_filter_idx =
(x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
assert(x_filter_idx <= RS_SUBPEL_MASK);
const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
int k, sum = 0;
for (k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
x_qn += x_step_qn;
}
src += src_stride;
dst += dst_stride;
}
}
#if CONFIG_HIGHBITDEPTH
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
const int16_t *x_filters, int interp_taps,
int x0_qn, int x_step_qn, int bd) {
int x, y;
src -= interp_taps / 2 - 1;
for (y = 0; y < h; ++y) {
int x_qn = x0_qn;
for (x = 0; x < w; ++x) {
const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
const int x_filter_idx =
(x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
assert(x_filter_idx <= RS_SUBPEL_MASK);
const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
int k, sum = 0;
for (k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
x_qn += x_step_qn;
}
src += src_stride;
dst += dst_stride;
}
}
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
......
......@@ -435,7 +435,7 @@ static void interpolate(const uint8_t *const input, int in_length,
#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
#define UPSCALE_PROC_UNIT 64 // Source step (roughly)
#define UPSCALE_PROC_UNIT 0 // Source step (roughly), 0: do not use
#define UPSCALE_PROC_UNIT_SCALE (UPSCALE_PROC_UNIT / SCALE_NUMERATOR)
static int32_t get_upscale_convolve_step(int in_length, int out_length) {
......@@ -453,49 +453,37 @@ static int32_t get_upscale_convolve_x0(int in_length, int out_length,
return (int32_t)((uint32_t)x0 & RS_SCALE_SUBPEL_MASK);
}
static void convolve_horiz_superres_c(const uint8_t *src, uint8_t *dst,
const int16_t *x_filters, int interp_taps,
int x0_qn, int x_step_qn, int w) {
int x;
src -= interp_taps / 2 - 1;
int x_qn = x0_qn;
for (x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
const int x_filter_idx =
(x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
assert(x_filter_idx <= RS_SUBPEL_MASK);
const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
int k, sum = 0;
for (k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
x_qn += x_step_qn;
}
}
#if !CONFIG_HORZONLY_FRAME_SUPERRES
static void interpolate_normative_core(const uint8_t *const src, int in_length,
uint8_t *dst, int out_length,
int superres_denom,
const int16_t *interp_filters,
int interp_taps) {
(void)superres_denom;
assert(in_length < out_length);
const int oproc_unit = UPSCALE_PROC_UNIT_SCALE * superres_denom;
const int32_t x_step_qn = get_upscale_convolve_step(in_length, out_length);
const int32_t x0_qn =
get_upscale_convolve_x0(in_length, out_length, x_step_qn);
// Note since we are upscaling, the first output sample is located before
// the first input sample. Hence srcp = src - 1 + ... below
const uint8_t *srcp = src - 1;
#if UPSCALE_PROC_UNIT
int32_t x0 = x0_qn;
const int oproc_unit = UPSCALE_PROC_UNIT_SCALE * superres_denom;
int olen = oproc_unit;
for (int op = 0; op < out_length; op += olen) {
// Note since we are upscaling, the first output sample is located before
// the first input sample. Hence srcp = src - 1 + ... below
olen = AOMMIN(oproc_unit, out_length - op);
convolve_horiz_superres_c(srcp, dst + op, interp_filters, interp_taps, x0,
x_step_qn, olen);
av1_convolve_horiz_rs(srcp, 0, dst + op, 0, olen, 1, interp_filters,
interp_taps, x0, x_step_qn);
x0 += olen * x_step_qn;
// Note srcp may advance by UPSCALE_PROC_UNIT +/- 1
srcp += (x0 >> RS_SCALE_SUBPEL_BITS);
x0 &= RS_SCALE_SUBPEL_MASK;
}
#else
av1_convolve_horiz_rs(srcp, 0, dst, 0, out_length, 1, interp_filters,
interp_taps, x0_qn, x_step_qn);
#endif // UPSCALE_PROC_UNIT
}
static void interpolate_normative(const uint8_t *const input, int in_length,
......@@ -514,6 +502,7 @@ static void interpolate_normative(const uint8_t *const input, int in_length,
UPSCALE_NORMATIVE_TAPS);
aom_free(intbuf_alloc);
}
#endif // !CONFIG_HORZONLY_FRAME_SUPERRES
#endif // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
#ifndef __clang_analyzer__
......@@ -729,6 +718,7 @@ Error:
}
#if CONFIG_FRAME_SUPERRES
#if !(CONFIG_HORZONLY_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION)
static void upscale_normative_1D(const uint8_t *const input, int length,
uint8_t *output, int olength,
int superres_denom) {
......@@ -739,12 +729,12 @@ static void upscale_normative_1D(const uint8_t *const input, int length,
interpolate(input, length, output, olength);
#endif // CONFIG_LOOP_RESTORATION
}
#endif // !(CONFIG_HORZONLY_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION)
static void upscale_normative_plane(const uint8_t *const input, int height,
int width, int in_stride, uint8_t *output,
int height2, int width2, int out_stride,
int superres_denom) {
int i;
assert(width > 0);
assert(height > 0);
assert(width2 > 0);
......@@ -752,19 +742,28 @@ static void upscale_normative_plane(const uint8_t *const input, int height,
#if CONFIG_HORZONLY_FRAME_SUPERRES
(void)height;
(void)height2;
(void)superres_denom;
assert(height2 == height);
for (i = 0; i < height; ++i)
#if CONFIG_LOOP_RESTORATION
const int32_t x_step_qn = get_upscale_convolve_step(width, width2);
const int32_t x0_qn = get_upscale_convolve_x0(width, width2, x_step_qn);
av1_convolve_horiz_rs(input - 1, in_stride, output, out_stride, width2,
height2, &filter_normative[0][0],
UPSCALE_NORMATIVE_TAPS, x0_qn, x_step_qn);
#else
for (int i = 0; i < height; ++i)
upscale_normative_1D(input + in_stride * i, width, output + out_stride * i,
width2, superres_denom);
#endif // CONFIG_LOOP_RESTORATION
#else
uint8_t *intbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * width2 * height);
uint8_t *arrbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * height);
uint8_t *arrbuf2 = (uint8_t *)aom_malloc(sizeof(uint8_t) * height2);
if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
for (i = 0; i < height; ++i)
for (int i = 0; i < height; ++i)
upscale_normative_1D(input + in_stride * i, width, intbuf + width2 * i,
width2, superres_denom);
for (i = 0; i < width2; ++i) {
for (int i = 0; i < width2; ++i) {
fill_col_to_arr(intbuf + i, width2, height, arrbuf);
upscale_normative_1D(arrbuf, height, arrbuf2, height2, superres_denom);
fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
......@@ -872,51 +871,38 @@ static void highbd_interpolate(const uint16_t *const input, int in_length,
}
#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
static void highbd_convolve_horiz_superres_c(const uint16_t *src, uint16_t *dst,
const int16_t *x_filters,
int interp_taps, int x0_qn,
int x_step_qn, int w, int bd) {
int x;
src -= interp_taps / 2 - 1;
int x_qn = x0_qn;
for (x = 0; x < w; ++x) {
const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
const int x_filter_idx =
(x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
assert(x_filter_idx <= RS_SUBPEL_MASK);
const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
int k, sum = 0;
for (k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
x_qn += x_step_qn;
}
}
#if !CONFIG_HORZONLY_FRAME_SUPERRES
static void highbd_interpolate_normative_core(const uint16_t *const src,
int in_length, uint16_t *dst,
int out_length,
int superres_denom, int bd,
const int16_t *interp_filters,
int interp_taps) {
(void)superres_denom;
assert(in_length < out_length);
const int oproc_unit = UPSCALE_PROC_UNIT_SCALE * superres_denom;
const int32_t x_step_qn = get_upscale_convolve_step(in_length, out_length);
const int32_t x0_qn =
get_upscale_convolve_x0(in_length, out_length, x_step_qn);
// Note since we are upscaling, the first output sample is located before
// the first input sample. Hence srcp = src - 1 + ... below
const uint16_t *srcp = src - 1;
#if UPSCALE_PROC_UNIT
int32_t x0 = x0_qn;
const int oproc_unit = UPSCALE_PROC_UNIT_SCALE * superres_denom;
int olen = oproc_unit;
for (int op = 0; op < out_length; op += olen) {
// Note since we are upscaling, the first output sample is located before
// the first input sample. Hence srcp = src - 1 + ... below
olen = AOMMIN(oproc_unit, out_length - op);
highbd_convolve_horiz_superres_c(srcp, dst + op, interp_filters,
interp_taps, x0, x_step_qn, olen, bd);
av1_highbd_convolve_horiz_rs(srcp, 0, dst + op, 0, olen, 1, interp_filters,
interp_taps, x0, x_step_qn, bd);
x0 += olen * x_step_qn;
// Note srcp may advance by UPSCALE_PROC_UNIT +/- 1
srcp += (x0 >> RS_SCALE_SUBPEL_BITS);
x0 &= RS_SCALE_SUBPEL_MASK;
}
#else
av1_highbd_convolve_horiz_rs(srcp, 0, dst, 0, out_length, 1, interp_filters,
interp_taps, x0_qn, x_step_qn, bd);
#endif // UPSCALE_PROC_UNIT
}
static void highbd_interpolate_normative(const uint16_t *const input,
......@@ -936,6 +922,7 @@ static void highbd_interpolate_normative(const uint16_t *const input,
UPSCALE_NORMATIVE_TAPS);
aom_free(intbuf_alloc);
}
#endif // !CONFIG_HORZONLY_FRAME_SUPERRES
#endif // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
#ifndef __clang_analyzer__
......@@ -1135,6 +1122,7 @@ Error:
}
#if CONFIG_FRAME_SUPERRES
#if !(CONFIG_HORZONLY_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION)
static void highbd_upscale_normative_1D(const uint16_t *const input, int length,
uint16_t *output, int olength,
int superres_denom, int bd) {
......@@ -1146,13 +1134,13 @@ static void highbd_upscale_normative_1D(const uint16_t *const input, int length,
highbd_interpolate(input, length, output, olength, bd);
#endif // CONFIG_LOOP_RESTORATION
}
#endif // !(CONFIG_HORZONLY_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION)
static void highbd_upscale_normative_plane(const uint8_t *const input,
int height, int width, int in_stride,
uint8_t *output, int height2,
int width2, int out_stride,
int superres_denom, int bd) {
int i;
assert(width > 0);
assert(height > 0);
assert(width2 > 0);
......@@ -1160,23 +1148,33 @@ static void highbd_upscale_normative_plane(const uint8_t *const input,
#if CONFIG_HORZONLY_FRAME_SUPERRES
(void)height;
(void)height2;
(void)superres_denom;
assert(height2 == height);
for (i = 0; i < height; ++i)
#if CONFIG_LOOP_RESTORATION
const int32_t x_step_qn = get_upscale_convolve_step(width, width2);
const int32_t x0_qn = get_upscale_convolve_x0(width, width2, x_step_qn);
av1_highbd_convolve_horiz_rs(CONVERT_TO_SHORTPTR(input - 1), in_stride,
CONVERT_TO_SHORTPTR(output), out_stride, width2,
height2, &filter_normative[0][0],
UPSCALE_NORMATIVE_TAPS, x0_qn, x_step_qn, bd);
#else
for (int i = 0; i < height; ++i)
highbd_upscale_normative_1D(CONVERT_TO_SHORTPTR(input + in_stride * i),
width,
CONVERT_TO_SHORTPTR(output + out_stride * i),
width2, superres_denom, bd);
#endif // CONFIG_LOOP_RESTORATION
#else
uint16_t *intbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * width2 * height);
uint16_t *arrbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * height);
uint16_t *arrbuf2 = (uint16_t *)aom_malloc(sizeof(uint16_t) * height2);
if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
for (i = 0; i < height; ++i) {
for (int i = 0; i < height; ++i) {
highbd_upscale_normative_1D(CONVERT_TO_SHORTPTR(input + in_stride * i),
width, intbuf + width2 * i, width2,
superres_denom, bd);
}
for (i = 0; i < width2; ++i) {
for (int i = 0; i < width2; ++i) {
highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
highbd_upscale_normative_1D(arrbuf, height, arrbuf2, height2,
superres_denom, bd);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment