Commit 302d2c18 authored by Steinar Midtskogen's avatar Steinar Midtskogen
Browse files

Let od_dering() handle 16 to 8 bit conversion

Change-Id: Ief5df3d5b1b81f09190d34022a3cb7d500992da2
parent 5fe79db3
......@@ -78,44 +78,6 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
return count;
}
static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}
static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}
/* TODO: Optimize this function for SSE. */
void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, uint16_t *src,
dering_list *dlist, int dering_count,
int bsize) {
int bi, bx, by;
if (bsize == 3) {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
}
}
/* TODO: Optimize this function for SSE. */
static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
const uint8_t *src, int src_voffset, int src_hoffset,
......@@ -384,27 +346,28 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
threshold = level << coeff_shift;
if (threshold == 0 && clpf_strength == 0) continue;
od_dering(dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
dec[pli], dir, NULL, var, pli, dlist, dering_count, threshold,
clpf_strength, clpf_damping, coeff_shift, 0);
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
copy_dering_16bit_to_16bit(
&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
xd->plane[pli].dst.stride, dst, dlist, dering_count,
3 - dec[pli]);
od_dering((uint8_t *)&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
xd->plane[pli].dst.stride, dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
dec[pli], dir, NULL, var, pli, dlist, dering_count,
threshold, clpf_strength, clpf_damping, coeff_shift, 0, 1);
} else {
#endif
copy_dering_16bit_to_8bit(
od_dering(
&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
xd->plane[pli].dst.stride, dst, dlist, dering_count, bsize[pli]);
xd->plane[pli].dst.stride, dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
dec[pli], dir, NULL, var, pli, dlist, dering_count, threshold,
clpf_strength, clpf_damping, coeff_shift, 0, 0);
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
......
......@@ -239,12 +239,50 @@ void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
}
}
void od_dering(uint16_t *y, uint16_t *in, int xdec,
static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}
static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}
/* TODO: Optimize this function for SSE. */
static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, uint16_t *src,
dering_list *dlist, int dering_count,
int bsize) {
int bi, bx, by;
if (bsize == 3) {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
}
}
void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int dering_count, int threshold,
int clpf_strength, int clpf_damping, int coeff_shift,
int skip_dering) {
int skip_dering, int hbd) {
int bi;
int bx;
int by;
......@@ -292,18 +330,27 @@ void od_dering(uint16_t *y, uint16_t *in, int xdec,
}
}
}
if (!clpf_strength) return;
if (threshold && !skip_dering)
copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
bsize);
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
if (clpf_strength) {
if (threshold && !skip_dering)
copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
bsize);
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
(!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
: aom_clpf_hblock_hbd)(
in, &y[((bi - by) << 2 * bsize) - (bx << bsize)], OD_FILT_BSTRIDE,
1 << bsize, bx << bsize, by << bsize, 1 << bsize, 1 << bsize,
clpf_strength << coeff_shift, clpf_damping + coeff_shift);
(!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
: aom_clpf_hblock_hbd)(
in, &y[((bi - by) << 2 * bsize) - (bx << bsize)], OD_FILT_BSTRIDE,
1 << bsize, bx << bsize, by << bsize, 1 << bsize, 1 << bsize,
clpf_strength << coeff_shift, clpf_damping + coeff_shift);
}
}
if (dst) {
if (hbd) {
copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
dering_count, 3 - xdec);
} else {
copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
}
}
}
......@@ -46,12 +46,12 @@ void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
dering_list *dlist, int dering_count,
int bsize);
void od_dering(uint16_t *y, uint16_t *in, int xdec,
void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int dering_count, int threshold,
int clpf_strength, int clpf_damping, int coeff_shift,
int skip_dering);
int skip_dering, int hbd);
int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir);
......
......@@ -263,14 +263,11 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
src[pli], (sbr * MAX_MIB_SIZE << bsize[pli]) - yoff,
(sbc * MAX_MIB_SIZE << bsize[pli]) - xoff, stride[pli],
ysize, xsize);
od_dering(tmp_dst, in, dec[pli], dir, &dirinit, var, pli, dlist,
od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
tmp_dst, in, dec[pli], dir, &dirinit, var, pli, dlist,
dering_count, threshold,
clpf_strength + (clpf_strength == 3), clpf_damping,
coeff_shift, clpf_strength != 0);
if (clpf_strength == 0) {
copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, tmp_dst, dlist,
dering_count, bsize[pli]);
}
coeff_shift, clpf_strength != 0, 1);
mse[pli][sb_count][gi] = compute_dering_mse(
ref_coeff[pli] +
(sbr * MAX_MIB_SIZE << bsize[pli]) * stride[pli] +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment