Commit 13b775a0 authored by Jean-Marc Valin's avatar Jean-Marc Valin Committed by Jean-Marc Valin

De-sparsifying the deringing output buffer

No change in output

Change-Id: I940203975564aedca8734d6f74b013edb513f517
parent bc21cb21
......@@ -90,7 +90,7 @@ static INLINE void copy_4x4_16_8bit(uint8_t *dst, int dstride, int16_t *src, int
}
/* TODO: Optimize this function for SSE. */
void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride,
void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src,
unsigned char (*bskip)[2], int dering_count, int bsize)
{
int bi, bx, by;
......@@ -100,7 +100,7 @@ void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride,
bx = bskip[bi][1];
copy_8x8_16_8bit(&dst[(by << 3) * dstride + (bx << 3)],
dstride,
&src[(by << 3) * sstride + (bx << 3)], sstride);
&src[bi << 2*bsize], 1 << bsize);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
......@@ -108,7 +108,7 @@ void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride,
bx = bskip[bi][1];
copy_4x4_16_8bit(&dst[(by << 2) * dstride + (bx << 2)],
dstride,
&src[(by << 2) * sstride + (bx << 2)], sstride);
&src[bi << 2*bsize], 1 << bsize);
}
}
}
......@@ -182,7 +182,7 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
else
threshold = level << coeff_shift;
if (threshold == 0) continue;
od_dering(dst, MAX_MIB_SIZE * bsize[pli],
od_dering(dst,
&src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
sbc * bsize[pli] * MAX_MIB_SIZE],
stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
......@@ -194,7 +194,7 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
xd->plane[pli].dst.buf)[xd->plane[pli].dst.stride *
(bsize[pli] * MAX_MIB_SIZE * sbr) +
sbc * bsize[pli] * MAX_MIB_SIZE],
xd->plane[pli].dst.stride, dst, MAX_MIB_SIZE * bsize[pli], bskip,
xd->plane[pli].dst.stride, dst, bskip,
dering_count, 3 - dec[pli]);
} else {
#endif
......@@ -202,7 +202,7 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
(bsize[pli] * MAX_MIB_SIZE * sbr) +
sbc * bsize[pli] * MAX_MIB_SIZE],
xd->plane[pli].dst.stride, dst, MAX_MIB_SIZE * bsize[pli], bskip,
xd->plane[pli].dst.stride, dst, bskip,
dering_count, 3 - dec[pli]);
#if CONFIG_AOM_HIGHBITDEPTH
}
......
......@@ -277,7 +277,7 @@ static INLINE void copy_4x4_16bit(int16_t *dst, int dstride, int16_t *src, int s
}
/* TODO: Optimize this function for SSE. */
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src,
unsigned char (*bskip)[2], int dering_count, int bsize)
{
int bi, bx, by;
......@@ -287,7 +287,7 @@ void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
bx = bskip[bi][1];
copy_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)],
dstride,
&src[(by << 3) * sstride + (bx << 3)], sstride);
&src[bi << 2*bsize], 1 << bsize);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
......@@ -295,12 +295,12 @@ void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
bx = bskip[bi][1];
copy_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)],
dstride,
&src[(by << 2) * sstride + (bx << 2)], sstride);
&src[bi << 2*bsize], 1 << bsize);
}
}
}
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
void od_dering(int16_t *y, const od_dering_in *x, int xstride,
int nhb, int nvb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char (*bskip)[2], int dering_count, int threshold,
......@@ -349,7 +349,7 @@ void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
since the ringing there tends to be directional, so it doesn't
get removed by the directional filtering. */
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&y[bi << 2*bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
od_adjust_thresh(threshold, var), dir[by][bx]);
}
......@@ -358,19 +358,19 @@ void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
by = bskip[bi][0];
bx = bskip[bi][1];
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&y[bi << 2*bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
dir[by][bx]);
}
}
copy_blocks_16bit(in, OD_FILT_BSTRIDE, y, ystride, bskip, dering_count,
copy_blocks_16bit(in, OD_FILT_BSTRIDE, y, bskip, dering_count,
bsize);
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
if (filter2_thresh[by][bx] == 0) continue;
(filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&y[bi << 2*bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
dir[by][bx]);
}
......
......@@ -36,10 +36,10 @@ typedef int (*od_filter_dering_direction_func)(int16_t *y, int ystride,
typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
const int16_t *in,
int threshold, int dir);
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src,
unsigned char (*bskip)[2], int dering_count, int bsize);
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
void od_dering(int16_t *y, const od_dering_in *x, int xstride,
int nvb, int nhb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char (*bskip)[2], int skip_stride, int threshold,
......
......@@ -98,6 +98,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int best_gi;
int32_t best_mse = INT32_MAX;
int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
int16_t tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
if (sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip, &dering_count))
......@@ -115,13 +116,15 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
sbc * bsize[0] * MAX_MIB_SIZE + c];
}
}
od_dering(dst, MAX_MIB_SIZE * bsize[0],
od_dering(tmp_dst,
&src[sbr * stride * bsize[0] * MAX_MIB_SIZE +
sbc * bsize[0] * MAX_MIB_SIZE],
cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0,
dir, 0,
bskip,
dering_count, threshold, coeff_shift);
copy_blocks_16bit(dst, MAX_MIB_SIZE * bsize[0], tmp_dst, bskip,
dering_count, 3);
cur_mse = (int)compute_dist(
dst, MAX_MIB_SIZE * bsize[0],
&ref_coeff[sbr * stride * bsize[0] * MAX_MIB_SIZE +
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment