Commit 94493e60 authored by Angie Chiang's avatar Angie Chiang Committed by Gerrit Code Review

Merge "Fix 12 TAP convolution bug" into nextgenv2

parents af3a8381 1e403064
......@@ -60,38 +60,42 @@ TEST(VP10ConvolveTest, vp10_convolve) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 3;
int subpel_y_q4 = 2;
int avg = 0;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src[i] = rnd.Rand16() % (1 << 8);
}
vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, avg);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
vp10_convolve(src + src_stride * filter_center + filter_center,
src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
}
temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
EXPECT_EQ(dst[0], dst_ref);
}
temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
EXPECT_EQ(dst[0], dst_ref);
}
TEST(VP10ConvolveTest, vp10_convolve_avg) {
......@@ -110,13 +114,14 @@ TEST(VP10ConvolveTest, vp10_convolve_avg) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 3;
int subpel_y_q4 = 2;
int avg = 0;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src0[i] = rnd.Rand16() % (1 << 8);
src1[i] = rnd.Rand16() % (1 << 8);
......@@ -124,23 +129,29 @@ TEST(VP10ConvolveTest, vp10_convolve_avg) {
int offset = filter_size * filter_center + filter_center;
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
avg);
avg = 0;
vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
avg);
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
avg = 1;
vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 0;
vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 0;
vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
avg = 1;
vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
}
}
}
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -157,40 +168,45 @@ TEST(VP10ConvolveTest, vp10_highbd_convolve) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 8;
int subpel_y_q4 = 6;
int avg = 0;
int bd = 10;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src[i] = rnd.Rand16() % (1 << bd);
}
vp10_highbd_convolve(
CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
vp10_highbd_convolve(
CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
const int16_t* x_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
const int16_t* y_filter =
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
int temp[12];
int dst_ref = 0;
for (int r = 0; r < filter_size; r++) {
temp[r] = 0;
for (int c = 0; c < filter_size; c++) {
temp[r] += x_filter[c] * src[r * filter_size + c];
}
temp[r] =
clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
EXPECT_EQ(dst[0], dst_ref);
}
temp[r] = clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
dst_ref += temp[r] * y_filter[r];
}
dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
EXPECT_EQ(dst[0], dst_ref);
}
TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
......@@ -209,42 +225,49 @@ TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
int dst_stride = 1;
int x_step_q4 = 16;
int y_step_q4 = 16;
int subpel_x_q4 = 3;
int subpel_y_q4 = 2;
int avg = 0;
int bd = 10;
int w = 1;
int h = 1;
int subpel_x_q4;
int subpel_y_q4;
for (int i = 0; i < filter_size * filter_size; i++) {
src0[i] = rnd.Rand16() % (1 << bd);
src1[i] = rnd.Rand16() % (1 << bd);
}
int offset = filter_size * filter_center + filter_center;
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
avg = 1;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
int offset = filter_size * filter_center + filter_center;
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 0;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
avg = 1;
vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, avg, bd);
EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
}
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
#include <assert.h>
#include <string.h>
#include "vp10/common/filter.h"
#include "vpx_dsp/vpx_dsp_common.h"
......@@ -69,22 +70,35 @@ static void convolve_vert(const uint8_t *src, int src_stride, uint8_t *dst,
}
}
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int avg) {
if (avg == 0) {
int r;
for (r = 0; r < h; ++r) {
memcpy(dst, src, w);
src += src_stride;
dst += dst_stride;
}
} else {
int r, c;
for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) {
dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
}
src += src_stride;
dst += dst_stride;
}
}
}
void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
int y_step_q4, int avg) {
int filter_size = filter_params.tap;
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
assert(w <= MAX_BLOCK_WIDTH);
assert(h <= MAX_BLOCK_HEIGHT);
......@@ -92,11 +106,31 @@ void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
assert(x_step_q4 <= MAX_STEP);
assert(filter_params.tap <= MAX_FILTER_TAP);
convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
temp_stride, w, intermediate_height, filter_params,
subpel_x_q4, x_step_q4, 0);
convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
if (ignore_horiz && ignore_vert) {
convolve_copy(src, src_stride, dst, dst_stride, w, h, avg);
} else if (ignore_vert) {
convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg);
} else if (ignore_horiz) {
convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg);
} else {
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
temp_stride, w, intermediate_height, filter_params,
subpel_x_q4, x_step_q4, 0);
convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
}
}
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -164,23 +198,39 @@ static void highbd_convolve_vert(const uint16_t *src, int src_stride,
}
}
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
int avg, int bd) {
if (avg == 0) {
int r;
for (r = 0; r < h; ++r) {
memcpy(dst, src, w * sizeof(*src));
src += src_stride;
dst += dst_stride;
}
} else {
int r, c;
for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) {
dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
}
src += src_stride;
dst += dst_stride;
}
}
}
void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4, int avg,
int bd) {
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
int filter_size = filter_params.tap;
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
assert(w <= MAX_BLOCK_WIDTH);
assert(h <= MAX_BLOCK_HEIGHT);
......@@ -188,12 +238,31 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
assert(x_step_q4 <= MAX_STEP);
assert(filter_params.tap <= MAX_FILTER_TAP);
highbd_convolve_horiz(
CONVERT_TO_SHORTPTR(src8 - src_stride * (filter_size / 2 - 1)),
src_stride, temp, temp_stride, w, intermediate_height, filter_params,
subpel_x_q4, x_step_q4, 0, bd);
highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
CONVERT_TO_SHORTPTR(dst8), dst_stride, w, h,
filter_params, subpel_y_q4, y_step_q4, avg, bd);
if (ignore_horiz && ignore_vert) {
highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, avg, bd);
} else if (ignore_vert) {
highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg, bd);
} else if (ignore_horiz) {
highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg, bd);
} else {
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
MAX_FILTER_TAP) *
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
temp, temp_stride, w, intermediate_height,
filter_params, subpel_x_q4, x_step_q4, 0, bd);
highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
temp_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg, bd);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment