Commit 1e403064 authored by Angie Chiang's avatar Angie Chiang

Fix 12 TAP convolution bug

Priviously, we do 12-tap interpolation even there is no sub pixel,
This could cause a bug becuase decoder doesn't extend border when there
is no sub pixel. In this situation, if we still do interpolation, we
will access the border extension which doesn't exist and cause a
memory error

Change-Id: I55b879722f0a10c5d13261bd9617a75c826a2418
parent 961668c9
......@@ -60,38 +60,42 @@ TEST(VP10ConvolveTest, vp10_convolve) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 3;
int subpel_y_q4 = 2;
int avg = 0;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src[i] = rnd.Rand16() % (1 << 8);
}
vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, avg);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
vp10_convolve(src + src_stride * filter_center + filter_center,
src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
}
temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
EXPECT_EQ(dst[0], dst_ref);
}
temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
EXPECT_EQ(dst[0], dst_ref);
}
TEST(VP10ConvolveTest, vp10_convolve_avg) {
......@@ -110,13 +114,14 @@ TEST(VP10ConvolveTest, vp10_convolve_avg) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 3;
int subpel_y_q4 = 2;
int avg = 0;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src0[i] = rnd.Rand16() % (1 << 8);
src1[i] = rnd.Rand16() % (1 << 8);
......@@ -124,23 +129,29 @@ TEST(VP10ConvolveTest, vp10_convolve_avg) {
int offset = filter_size * filter_center + filter_center;
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
avg);
avg = 0;
vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
avg);
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
avg = 1;
vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 0;
vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 1;
vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
}
}
}
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -157,40 +168,45 @@ TEST(VP10ConvolveTest, vp10_highbd_convolve) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 8;
int subpel_y_q4 = 6;
int avg = 0;
int bd = 10;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src[i] = rnd.Rand16() % (1 << bd);
}
vp10_highbd_convolve(
CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
vp10_highbd_convolve(
CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
}
temp[r] =
clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
EXPECT_EQ(dst[0], dst_ref);
}
temp[r] = clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
EXPECT_EQ(dst[0], dst_ref);
}
TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
......@@ -209,42 +225,49 @@ TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 3;
int subpel_y_q4 = 2;
int avg = 0;
int bd = 10;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src0[i] = rnd.Rand16() % (1 << bd);
src1[i] = rnd.Rand16() % (1 << bd);
}
int offset = filter_size * filter_center + filter_center;
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
avg = 1;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
int offset = filter_size * filter_center + filter_center;
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 1;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
}
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
#include <assert.h>
#include <string.h>
#include "vp10/common/filter.h"
#include "vpx_dsp/vpx_dsp_common.h"
......@@ -69,22 +70,35 @@ static void convolve_vert(const uint8_t *src, int src_stride, uint8_t *dst,
}
}
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int avg) {
if (avg == 0) {
int r;
for (r = 0; r < h; ++r) {
memcpy(dst, src, w);
src += src_stride;
dst += dst_stride;
}
} else {
int r, c;
for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) {
dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
}
src += src_stride;
dst += dst_stride;
}
}
}
void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
int y_step_q4, int avg) {
int filter_size = filter_params.tap;
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
assert(w <= MAX_BLOCK_WIDTH);
assert(h <= MAX_BLOCK_HEIGHT);
......@@ -92,11 +106,31 @@ void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
assert(x_step_q4 <= MAX_STEP);
assert(filter_params.tap <= MAX_FILTER_TAP);
convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
temp_stride, w, intermediate_height, filter_params,
subpel_x_q4, x_step_q4, 0);
convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
if (ignore_horiz && ignore_vert) {
convolve_copy(src, src_stride, dst, dst_stride, w, h, avg);
} else if (ignore_vert) {
convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg);
} else if (ignore_horiz) {
convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg);
} else {
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
temp_stride, w, intermediate_height, filter_params,
subpel_x_q4, x_step_q4, 0);
convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
}
}
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -164,23 +198,39 @@ static void highbd_convolve_vert(const uint16_t *src, int src_stride,
}
}
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
int avg, int bd) {
if (avg == 0) {
int r;
for (r = 0; r < h; ++r) {
memcpy(dst, src, w * sizeof(*src));
src += src_stride;
dst += dst_stride;
}
} else {
int r, c;
for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) {
dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
}
src += src_stride;
dst += dst_stride;
}
}
}
void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4, int avg,
int bd) {
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
int filter_size = filter_params.tap;
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
assert(w <= MAX_BLOCK_WIDTH);
assert(h <= MAX_BLOCK_HEIGHT);
......@@ -188,12 +238,31 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
assert(x_step_q4 <= MAX_STEP);
assert(filter_params.tap <= MAX_FILTER_TAP);
highbd_convolve_horiz(
CONVERT_TO_SHORTPTR(src8 - src_stride * (filter_size / 2 - 1)),
src_stride, temp, temp_stride, w, intermediate_height, filter_params,
subpel_x_q4, x_step_q4, 0, bd);
highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
CONVERT_TO_SHORTPTR(dst8), dst_stride, w, h,
filter_params, subpel_y_q4, y_step_q4, avg, bd);
if (ignore_horiz && ignore_vert) {
highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, avg, bd);
} else if (ignore_vert) {
highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg, bd);
} else if (ignore_horiz) {
highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg, bd);
} else {
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
temp, temp_stride, w, intermediate_height,
filter_params, subpel_x_q4, x_step_q4, 0, bd);
highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
temp_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg, bd);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment