Commit 401204a5 authored by David Barker's avatar David Barker

Fix dering filter when using 4:2:2 or 4:4:0 subsampling

Change-Id: Ifa5bef5123e13df9cad59c7c870b58e18c2ce213
parent 4b4e5eef
......@@ -54,8 +54,10 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
unsigned char *bskip;
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride;
int bsize[3];
int dec[3];
int bsize_x[3];
int bsize_y[3];
int dec_x[3];
int dec_y[3];
int pli;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
......@@ -63,14 +65,16 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
bskip = aom_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
av1_setup_dst_planes(xd->plane, frame, 0, 0);
for (pli = 0; pli < 3; pli++) {
dec[pli] = xd->plane[pli].subsampling_x;
bsize[pli] = 8 >> dec[pli];
dec_x[pli] = xd->plane[pli].subsampling_x;
dec_y[pli] = xd->plane[pli].subsampling_y;
bsize_x[pli] = 8 >> dec_x[pli];
bsize_y[pli] = 8 >> dec_y[pli];
}
stride = bsize[0] * cm->mi_cols;
stride = bsize_x[0] * cm->mi_cols;
for (pli = 0; pli < 3; pli++) {
src[pli] = aom_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
for (r = 0; r < bsize[pli] * cm->mi_rows; ++r) {
for (c = 0; c < bsize[pli] * cm->mi_cols; ++c) {
for (r = 0; r < bsize_y[pli] * cm->mi_rows; ++r) {
for (c = 0; c < bsize_x[pli] * cm->mi_cols; ++c) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(
......@@ -114,27 +118,29 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
else
threshold = level << coeff_shift;
if (threshold == 0) continue;
od_dering(dst, MAX_MIB_SIZE * bsize[pli],
&src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
sbc * bsize[pli] * MAX_MIB_SIZE],
stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
od_dering(dst, MAX_MIB_SIZE * bsize_x[pli],
&src[pli][sbr * stride * bsize_x[pli] * MAX_MIB_SIZE +
sbc * bsize_x[pli] * MAX_MIB_SIZE],
stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec_x[pli],
dec_y[pli], dir, pli,
&bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
cm->mi_cols, threshold, coeff_shift);
for (r = 0; r < bsize[pli] * nvb; ++r) {
for (c = 0; c < bsize[pli] * nhb; ++c) {
for (r = 0; r < bsize_y[pli] * nvb; ++r) {
for (c = 0; c < bsize_x[pli] * nhb; ++c) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
[xd->plane[pli].dst.stride *
(bsize[pli] * MAX_MIB_SIZE * sbr + r) +
sbc * bsize[pli] * MAX_MIB_SIZE + c] =
dst[r * MAX_MIB_SIZE * bsize[pli] + c];
(bsize_x[pli] * MAX_MIB_SIZE * sbr + r) +
sbc * bsize_x[pli] * MAX_MIB_SIZE + c] =
dst[r * MAX_MIB_SIZE * bsize_x[pli] + c];
} else {
#endif
xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
(bsize[pli] * MAX_MIB_SIZE * sbr + r) +
sbc * bsize[pli] * MAX_MIB_SIZE + c] =
dst[r * MAX_MIB_SIZE * bsize[pli] + c];
xd->plane[pli]
.dst.buf[xd->plane[pli].dst.stride *
(bsize_x[pli] * MAX_MIB_SIZE * sbr + r) +
sbc * bsize_x[pli] * MAX_MIB_SIZE + c] =
dst[r * MAX_MIB_SIZE * bsize_x[pli] + c];
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
......
......@@ -183,6 +183,19 @@ int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
return (total_abs + 2) >> 2;
}
int od_filter_dering_direction_4x8(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir) {
return od_filter_dering_direction_4x4(y, ystride, in, threshold, dir)
+ od_filter_dering_direction_4x4(y + 4*ystride, ystride,
in + 4*OD_FILT_BSTRIDE, threshold, dir);
}
int od_filter_dering_direction_8x4(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir) {
return od_filter_dering_direction_4x4(y, ystride, in, threshold, dir)
+ od_filter_dering_direction_4x4(y + 4, ystride, in + 4, threshold, dir);
}
/* Smooth in the direction orthogonal to what was detected. */
void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
const int16_t *in, int threshold,
......@@ -241,6 +254,21 @@ void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
}
}
void od_filter_dering_orthogonal_4x8(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir) {
od_filter_dering_orthogonal_4x4(y, ystride, in, threshold, dir);
od_filter_dering_orthogonal_4x4(y + 4*ystride, ystride,
in + 4*OD_FILT_BSTRIDE, threshold, dir);
}
void od_filter_dering_orthogonal_8x4(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir) {
od_filter_dering_orthogonal_4x4(y, ystride, in, threshold, dir);
od_filter_dering_orthogonal_4x4(y + 4, ystride, in + 4, threshold, dir);
}
/* This table approximates x^0.16 with the index being log2(x). It is clamped
to [-.5, 3]. The table is computed as:
round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
......@@ -264,7 +292,7 @@ static INLINE int od_adjust_thresh(int threshold, int32_t var) {
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int nhb, int nvb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char *bskip, int skip_stride, int threshold,
int coeff_shift) {
int i;
......@@ -273,32 +301,35 @@ void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int by;
int16_t inbuf[OD_DERING_INBUF_SIZE];
int16_t *in;
int bsize;
int bsize_x = 3 - xdec;
int bsize_y = 3 - ydec;
int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
int filter2_thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES] = {
od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
od_filter_dering_direction_8x8, od_filter_dering_direction_8x4,
od_filter_dering_direction_4x8, od_filter_dering_direction_4x4
};
od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES] = {
od_filter_dering_orthogonal_4x4, od_filter_dering_orthogonal_8x8
od_filter_dering_orthogonal_8x8, od_filter_dering_orthogonal_8x4,
od_filter_dering_orthogonal_4x8, od_filter_dering_orthogonal_4x4
};
bsize = 3 - xdec;
int filter_idx = xdec*2 + ydec;
in = inbuf + OD_FILT_BORDER * OD_FILT_BSTRIDE + OD_FILT_BORDER;
/* We avoid filtering the pixels for which some of the pixels to average
are outside the frame. We could change the filter instead, but it would
add special cases for any future vectorization. */
for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
for (i = -OD_FILT_BORDER * (sby != 0);
i < (nvb << bsize) + OD_FILT_BORDER * (sby != nvsb - 1); i++) {
i < (nvb << bsize_y) + OD_FILT_BORDER * (sby != nvsb - 1); i++) {
for (j = -OD_FILT_BORDER * (sbx != 0);
j < (nhb << bsize) + OD_FILT_BORDER * (sbx != nhsb - 1); j++) {
j < (nhb << bsize_x) + OD_FILT_BORDER * (sbx != nhsb - 1); j++) {
in[i * OD_FILT_BSTRIDE + j] = x[i * xstride + j];
}
}
/* Assume deringing filter is sparsely applied, so do one large copy rather
than small copies later if deringing is skipped. */
for (i = 0; i < nvb << bsize; i++) {
for (j = 0; j < nhb << bsize; j++) {
for (i = 0; i < nvb << bsize_y; i++) {
for (j = 0; j < nhb << bsize_x; j++) {
y[i * ystride + j] = in[i * OD_FILT_BSTRIDE + j];
}
}
......@@ -316,9 +347,9 @@ void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
to be a little bit more aggressive on pure horizontal/vertical
since the ringing there tends to be directional, so it doesn't
get removed by the directional filtering. */
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
filter2_thresh[by][bx] = (filter_dering_direction[filter_idx])(
&y[(by * ystride << bsize_y) + (bx << bsize_x)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize_y) + (bx << bsize_x)],
od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
}
}
......@@ -326,25 +357,25 @@ void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (bskip[by * skip_stride + bx]) continue;
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
filter2_thresh[by][bx] = (filter_dering_direction[filter_idx])(
&y[(by * ystride << bsize_y) + (bx << bsize_x)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize_y) + (bx << bsize_x)], threshold,
dir[by][bx]);
}
}
}
for (i = 0; i < nvb << bsize; i++) {
for (j = 0; j < nhb << bsize; j++) {
for (i = 0; i < nvb << bsize_y; i++) {
for (j = 0; j < nhb << bsize_x; j++) {
in[i * OD_FILT_BSTRIDE + j] = y[i * ystride + j];
}
}
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (bskip[by * skip_stride + bx] || filter2_thresh[by][bx] == 0) continue;
(filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
dir[by][bx]);
(filter_dering_orthogonal[filter_idx])(
&y[(by * ystride << bsize_y) + (bx << bsize_x)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize_y) + (bx << bsize_x)],
filter2_thresh[by][bx], dir[by][bx]);
}
}
}
......@@ -19,7 +19,7 @@
typedef int16_t od_dering_in;
#endif
#define OD_DERINGSIZES (2)
#define OD_DERINGSIZES (4)
#define OD_DERING_NBLOCKS (OD_BSIZE_MAX / 8)
......@@ -36,16 +36,24 @@ typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
int threshold, int dir);
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int nvb, int nhb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char *bskip, int skip_stride, int threshold,
int coeff_shift);
int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_4x8(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_8x4(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_8x8_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir);
void od_filter_dering_orthogonal_4x8(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
void od_filter_dering_orthogonal_8x4(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir);
......
......@@ -115,7 +115,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
od_dering(dst, MAX_MIB_SIZE * bsize[0],
&src[sbr * stride * bsize[0] * MAX_MIB_SIZE +
sbc * bsize[0] * MAX_MIB_SIZE],
cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0,
cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0, 0,
dir, 0,
&bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
cm->mi_cols, threshold, coeff_shift);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment