Commit c1ca945c authored by Yaowu Xu's avatar Yaowu Xu Committed by Gerrit Code Review

Merge changes from topic 'update_dering' into nextgenv2

* changes:
  Reformatting the deringing code
  Introducing OD_DERING_SIZE_LOG2 constant (3)
  Renaming deringing blockwise write-back functions to make code clearer
  Deringing refactoring: replace last_sbc with simpler dering_left flag
  Getting rid of the od_dering_in type
parents 7036aee1 39d92a07
......@@ -9,8 +9,6 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// clang-format off
#include <string.h>
#include <math.h>
......@@ -48,7 +46,7 @@ int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
}
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist) {
dering_list *dlist) {
int r, c;
int maxc, maxr;
MODE_INFO **grid;
......@@ -72,58 +70,55 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
return count;
}
static INLINE void copy_8x8_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) {
static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
dst[i * dstride + j] = src[i * sstride + j];
for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
}
static INLINE void copy_4x4_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) {
static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
dst[i * dstride + j] = src[i * sstride + j];
for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
}
/* TODO: Optimize this function for SSE. */
void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src,
dering_list *dlist, int dering_count, int bsize)
{
void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src,
dering_list *dlist, int dering_count,
int bsize) {
int bi, bx, by;
if (bsize == 3) {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16_8bit(&dst[(by << 3) * dstride + (bx << 3)],
dstride,
&src[bi << 2*bsize], 1 << bsize);
copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16_8bit(&dst[(by << 2) * dstride + (bx << 2)],
dstride,
&src[bi << 2*bsize], 1 << bsize);
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
}
}
/* TODO: Optimize this function for SSE. */
static void copy_sb8_16(AV1_COMMON *cm, int16_t *dst, int dstride,
const uint8_t *src, int src_voffset, int src_hoffset, int sstride,
int vsize, int hsize)
{
const uint8_t *src, int src_voffset, int src_hoffset,
int sstride, int vsize, int hsize) {
int r, c;
(void)cm;
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
const uint16_t *base = &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride
+ src_hoffset];
const uint16_t *base =
&CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
for (r = 0; r < vsize; r++) {
for (c = 0; c < hsize; c++) {
dst[r * dstride + c] = base[r*sstride + c];
dst[r * dstride + c] = base[r * sstride + c];
}
}
} else
......@@ -132,7 +127,7 @@ static void copy_sb8_16(AV1_COMMON *cm, int16_t *dst, int dstride,
const uint8_t *base = &src[src_voffset * sstride + src_hoffset];
for (r = 0; r < vsize; r++) {
for (c = 0; c < hsize; c++) {
dst[r * dstride + c] = base[r*sstride + c];
dst[r * dstride + c] = base[r * sstride + c];
}
}
}
......@@ -143,10 +138,10 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int r, c;
int sbr, sbc;
int nhsb, nvsb;
od_dering_in src[OD_DERING_INBUF_SIZE];
int16_t src[OD_DERING_INBUF_SIZE];
int16_t *linebuf[3];
int16_t colbuf[3][OD_BSIZE_MAX + 2*OD_FILT_VBORDER][OD_FILT_HBORDER];
dering_list dlist[MAX_MIB_SIZE*MAX_MIB_SIZE];
int16_t colbuf[3][OD_BSIZE_MAX + 2 * OD_FILT_VBORDER][OD_FILT_HBORDER];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
int dering_count;
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
......@@ -154,7 +149,7 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int bsize[3];
int dec[3];
int pli;
int last_sbc;
int dering_left;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
int nplanes;
if (xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
......@@ -171,27 +166,26 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
curr_row_dering = prev_row_dering + nhsb + 2;
for (pli = 0; pli < nplanes; pli++) {
dec[pli] = xd->plane[pli].subsampling_x;
bsize[pli] = 3 - dec[pli];
bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli];
}
stride = (cm->mi_cols << bsize[0]) + 2*OD_FILT_HBORDER;
stride = (cm->mi_cols << bsize[0]) + 2 * OD_FILT_HBORDER;
for (pli = 0; pli < nplanes; pli++) {
linebuf[pli] = aom_malloc(sizeof(*linebuf) * OD_FILT_VBORDER * stride);
}
for (sbr = 0; sbr < nvsb; sbr++) {
last_sbc = -1;
for (pli = 0; pli < nplanes; pli++) {
for (r = 0; r < (MAX_MIB_SIZE << bsize[pli]) + 2*OD_FILT_VBORDER; r++) {
for (r = 0; r < (MAX_MIB_SIZE << bsize[pli]) + 2 * OD_FILT_VBORDER; r++) {
for (c = 0; c < OD_FILT_HBORDER; c++) {
colbuf[pli][r][c] = OD_DERING_VERY_LARGE;
}
}
}
dering_left = 1;
for (sbc = 0; sbc < nhsb; sbc++) {
int level;
int nhb, nvb;
int cstart = 0;
if (sbc != last_sbc + 1)
cstart = -OD_FILT_HBORDER;
if (!dering_left) cstart = -OD_FILT_HBORDER;
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
level = compute_level_from_index(
......@@ -200,12 +194,14 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
->mbmi.dering_gain);
curr_row_dering[sbc] = 0;
if (level == 0 ||
(dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
sbc * MAX_MIB_SIZE, dlist)) == 0)
(dering_count = sb_compute_dering_list(
cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) {
dering_left = 0;
continue;
}
curr_row_dering[sbc] = 1;
for (pli = 0; pli < nplanes; pli++) {
int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
int16_t dst[OD_BSIZE_MAX * OD_BSIZE_MAX];
int threshold;
int coffset;
int rend, cend;
......@@ -223,8 +219,8 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
OD_DERING_VERY_LARGE to avoid filtering with the outside. */
for (r = 0; r < rend + OD_FILT_VBORDER; r++) {
for (c = cend; c < (nhb << bsize[pli]) + OD_FILT_HBORDER; ++c) {
src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER]
= OD_DERING_VERY_LARGE;
src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
OD_DERING_VERY_LARGE;
}
}
}
......@@ -232,7 +228,7 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
/* On the last superblock row, fill in the bottom border with
OD_DERING_VERY_LARGE to avoid filtering with the outside. */
for (r = rend; r < rend + OD_FILT_VBORDER; r++) {
for (c = 0; c < (nhb << bsize[pli]) + 2*OD_FILT_HBORDER; c++) {
for (c = 0; c < (nhb << bsize[pli]) + 2 * OD_FILT_HBORDER; c++) {
src[(r + OD_FILT_VBORDER) * OD_FILT_BSTRIDE + c] =
OD_DERING_VERY_LARGE;
}
......@@ -240,15 +236,18 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
}
/* Copy in the pixels we need from the current superblock for
deringing.*/
copy_sb8_16(cm, &src[OD_FILT_VBORDER*OD_FILT_BSTRIDE + OD_FILT_HBORDER
+ cstart], OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
copy_sb8_16(
cm,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart],
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * sbr, coffset + cstart,
xd->plane[pli].dst.stride, rend, cend-cstart);
xd->plane[pli].dst.stride, rend, cend - cstart);
if (!prev_row_dering[sbc]) {
copy_sb8_16(cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER, coffset,
xd->plane[pli].dst.stride, OD_FILT_VBORDER, nhb << bsize[pli]);
xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
nhb << bsize[pli]);
} else if (sbr > 0) {
for (r = 0; r < OD_FILT_VBORDER; r++) {
for (c = 0; c < nhb << bsize[pli]; c++) {
......@@ -265,11 +264,10 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
}
}
if (!prev_row_dering[sbc - 1]) {
copy_sb8_16(cm, src, OD_FILT_BSTRIDE,
xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
coffset - OD_FILT_HBORDER,
xd->plane[pli].dst.stride, OD_FILT_VBORDER, OD_FILT_HBORDER);
copy_sb8_16(cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
OD_FILT_VBORDER, OD_FILT_HBORDER);
} else if (sbr > 0 && sbc > 0) {
for (r = 0; r < OD_FILT_VBORDER; r++) {
for (c = -OD_FILT_HBORDER; c < 0; c++) {
......@@ -287,14 +285,14 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
}
if (!prev_row_dering[sbc + 1]) {
copy_sb8_16(cm, &src[OD_FILT_HBORDER + (nhb << bsize[pli])],
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
coffset + (nhb << bsize[pli]),
xd->plane[pli].dst.stride, OD_FILT_VBORDER, OD_FILT_HBORDER);
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
coffset + (nhb << bsize[pli]), xd->plane[pli].dst.stride,
OD_FILT_VBORDER, OD_FILT_HBORDER);
} else if (sbr > 0 && sbc < nhsb - 1) {
for (r = 0; r < OD_FILT_VBORDER; r++) {
for (c = nhb << bsize[pli];
c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) {
c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) {
src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
linebuf[pli][r * stride + coffset + c];
}
......@@ -302,13 +300,13 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
} else {
for (r = 0; r < OD_FILT_VBORDER; r++) {
for (c = nhb << bsize[pli];
c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) {
c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) {
src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
OD_DERING_VERY_LARGE;
}
}
}
if (sbc == last_sbc + 1) {
if (dering_left) {
/* If we deringed the superblock on the left then we need to copy in
saved pixels. */
for (r = 0; r < rend + OD_FILT_VBORDER; r++) {
......@@ -321,14 +319,14 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
for (c = 0; c < OD_FILT_HBORDER; c++) {
/* Saving pixels in case we need to dering the superblock on the
right. */
colbuf[pli][r][c] = src[r * OD_FILT_BSTRIDE + c
+ (nhb << bsize[pli])];
colbuf[pli][r][c] =
src[r * OD_FILT_BSTRIDE + c + (nhb << bsize[pli])];
}
}
copy_sb8_16(cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - OD_FILT_VBORDER, coffset,
xd->plane[pli].dst.stride, OD_FILT_VBORDER,
(nhb << bsize[pli]));
(MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - OD_FILT_VBORDER,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
(nhb << bsize[pli]));
/* FIXME: This is a temporary hack that uses more conservative
deringing for chroma. */
......@@ -337,31 +335,31 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
else
threshold = level << coeff_shift;
if (threshold == 0) continue;
od_dering(dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE
+ OD_FILT_HBORDER],
od_dering(
dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
dec[pli], dir, pli, dlist, dering_count, threshold, coeff_shift);
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
copy_blocks_16bit(
(int16_t*)&CONVERT_TO_SHORTPTR(
xd->plane[pli].dst.buf)[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
xd->plane[pli].dst.stride, dst, dlist,
dering_count, 3 - dec[pli]);
copy_dering_16bit_to_16bit(
(int16_t *)&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
xd->plane[pli].dst.stride, dst, dlist, dering_count,
3 - dec[pli]);
} else {
#endif
copy_blocks_16_8bit(
copy_dering_16bit_to_8bit(
&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
xd->plane[pli].dst.stride, dst, dlist,
dering_count, 3 - dec[pli]);
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
xd->plane[pli].dst.stride, dst, dlist, dering_count, bsize[pli]);
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
}
last_sbc = sbc;
dering_left = 1;
}
{
unsigned char *tmp;
......
......@@ -11,8 +11,6 @@
#ifndef AV1_COMMON_DERING_H_
#define AV1_COMMON_DERING_H_
// clang-format off
#include "av1/common/od_dering.h"
#include "av1/common/onyxc_int.h"
#include "aom/aom_integer.h"
......@@ -33,7 +31,7 @@ extern "C" {
int compute_level_from_index(int global_level, int gi);
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist);
dering_list *dlist);
void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd, int global_level);
......
......@@ -12,8 +12,6 @@
#include "config.h"
#endif
// clang-format off
#include <stdlib.h>
#include <math.h>
#include "dering.h"
......@@ -40,7 +38,7 @@ const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
in a particular direction. Since each direction have the same sum(x^2) term,
that term is never computed. See Section 2, step 2, of:
http://jmvalin.ca/notes/intra_paint.pdf */
int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var,
int od_dir_find8_c(const int16_t *img, int stride, int32_t *var,
int coeff_shift) {
int i;
int32_t cost[8] = { 0 };
......@@ -258,40 +256,38 @@ static INLINE int od_adjust_thresh(int threshold, int32_t var) {
return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
}
static INLINE void copy_8x8_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
static INLINE void copy_8x8_16bit_to_16bit(int16_t *dst, int dstride,
int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
dst[i * dstride + j] = src[i * sstride + j];
for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
}
static INLINE void copy_4x4_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
static INLINE void copy_4x4_16bit_to_16bit(int16_t *dst, int dstride,
int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
dst[i * dstride + j] = src[i * sstride + j];
for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
}
/* TODO: Optimize this function for SSE. */
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src,
dering_list *dlist, int dering_count, int bsize)
{
void copy_dering_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src,
dering_list *dlist, int dering_count,
int bsize) {
int bi, bx, by;
if (bsize == 3) {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)],
dstride,
&src[bi << 2*bsize], 1 << bsize);
copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)],
dstride,
&src[bi << 2*bsize], 1 << bsize);
copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
&src[bi << 2 * bsize], 1 << bsize);
}
}
}
......@@ -311,14 +307,14 @@ void od_dering(int16_t *y, int16_t *in, int xdec,
od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES] = {
od_filter_dering_orthogonal_4x4, od_filter_dering_orthogonal_8x8
};
bsize = 3 - xdec;
bsize = OD_DERING_SIZE_LOG2 - xdec;
if (pli == 0) {
for (bi = 0; bi < dering_count; bi++) {
int32_t var;
by = dlist[bi].by;
bx = dlist[bi].bx;
dir[by][bx] = od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx], OD_FILT_BSTRIDE,
&var, coeff_shift);
dir[by][bx] = od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
OD_FILT_BSTRIDE, &var, coeff_shift);
/* Deringing orthogonal to the direction uses a tighter threshold
because we want to be conservative. We've presumably already
achieved some deringing, so the amount of change is expected
......@@ -328,7 +324,7 @@ void od_dering(int16_t *y, int16_t *in, int xdec,
since the ringing there tends to be directional, so it doesn't
get removed by the directional filtering. */
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[bi << 2*bsize], 1 << bsize,
&y[bi << 2 * bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
od_adjust_thresh(threshold, var), dir[by][bx]);
}
......@@ -337,20 +333,20 @@ void od_dering(int16_t *y, int16_t *in, int xdec,
by = dlist[bi].by;
bx = dlist[bi].bx;
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[bi << 2*bsize], 1 << bsize,
&y[bi << 2 * bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
dir[by][bx]);
}
}
copy_blocks_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
bsize);
copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
bsize);
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
if (filter2_thresh[by][bx] == 0) continue;
(filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
&y[bi << 2*bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
dir[by][bx]);
&y[bi << 2 * bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
filter2_thresh[by][bx], dir[by][bx]);
}
}
......@@ -12,17 +12,12 @@
#if !defined(_dering_H)
#define _dering_H (1)
// clang-format off
#include "odintrin.h"
#if defined(DAALA_ODINTRIN)
#include "filter.h"
typedef int16_t od_dering_in;
#endif
#define OD_DERINGSIZES (2)
#define OD_DERING_SIZE_LOG2 (3)
#define OD_DERING_NBLOCKS (OD_BSIZE_MAX / 8)
/* We need to buffer three vertical lines. */
......@@ -49,8 +44,9 @@ typedef int (*od_filter_dering_direction_func)(int16_t *y, int ystride,
typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
const int16_t *in,
int threshold, int dir);
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src,
dering_list *dlist, int dering_count, int bsize);
void copy_dering_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src,
dering_list *dlist, int dering_count,
int bsize);
void od_dering(int16_t *y, int16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
......
......@@ -31,8 +31,6 @@ extern "C" {
typedef int od_coeff;
typedef int16_t od_dering_in;
#define OD_DIVU_DMAX (1024)
extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
......
......@@ -9,8 +9,6 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// clang-format off
#include <string.h>
#include <math.h>
......@@ -41,9 +39,9 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int r, c;
int sbr, sbc;
int nhsb, nvsb;
od_dering_in *src;
int16_t *src;
int16_t *ref_coeff;
dering_list dlist[MAX_MIB_SIZE*MAX_MIB_SIZE];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride;
int bsize[3];
......@@ -58,7 +56,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
av1_setup_dst_planes(xd->plane, frame, 0, 0);
for (pli = 0; pli < 3; pli++) {
dec[pli] = xd->plane[pli].subsampling_x;
bsize[pli] = 3 - dec[pli];
bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli];
}
stride = cm->mi_cols << bsize[0];
for (r = 0; r < cm->mi_rows << bsize[0]; ++r) {
......@@ -101,10 +99,9 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int16_t tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE,
dlist);
if (dering_count == 0)
continue;
dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
sbc * MAX_MIB_SIZE, dlist);
if (dering_count == 0) continue;
best_gi = 0;
for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
int cur_mse;
......@@ -123,13 +120,16 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
}
in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
/* We avoid filtering the pixels for which some of the pixels to average
are outside the frame. We could change the filter instead, but it would
are outside the frame. We could change the filter instead, but it
would
add special cases for any future vectorization. */
for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
inbuf[i] = OD_DERING_VERY_LARGE;
for (i = -OD_FILT_VBORDER * (sbr != 0);
i < (nvb << bsize[0]) + OD_FILT_VBORDER * (sbr != nvsb - 1); i++) {
for (j = -OD_FILT_HBORDER * (sbc != 0);
j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1); j++) {
j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
j++) {
int16_t *x;
x = &src[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
(sbc * MAX_MIB_SIZE << bsize[0])];
......@@ -137,9 +137,9 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
}
}
od_dering(tmp_dst, in, 0, dir, 0, dlist, dering_count, threshold,
coeff_shift);
copy_blocks_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst, dlist,
dering_count, 3);
coeff_shift);
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst,
dlist, dering_count, bsize[0]);
cur_mse = (int)compute_dist(
dst, MAX_MIB_SIZE << bsize[0],
&ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment