Commit 1af3d516 authored by Yaowu Xu's avatar Yaowu Xu Committed by Gerrit Code Review

Merge changes I313bde67,I2ddc2d70,Ifb9094c3,I9051ed6e,I5681e332, ... into nextgenv2

* changes:
  Avoid the "initial copy" in the deringing filter
  Only copy the deringed blocks back into the buffer
  Reducing copies in deringing filter
  sb_all_skip_out() now computes a list of deringed blocks
  compute bskip as we go
  Revert "Fix dering filter when using 4:2:2 or 4:4:0 subsampling"
parents bd163bc1 bcf3580b
......@@ -9,6 +9,8 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// clang-format off
#include <string.h>
#include <math.h>
......@@ -45,19 +47,84 @@ int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
return skip;
}
int sb_all_skip_out(const AV1_COMMON *const cm, int mi_row, int mi_col,
unsigned char (*bskip)[2], int *count_ptr) {
int r, c;
int maxc, maxr;
int skip = 1;
MODE_INFO **grid;
int count=0;
grid = cm->mi_grid_visible;
maxc = cm->mi_cols - mi_col;
maxr = cm->mi_rows - mi_row;
if (maxr > MAX_MIB_SIZE) maxr = MAX_MIB_SIZE;
if (maxc > MAX_MIB_SIZE) maxc = MAX_MIB_SIZE;
for (r = 0; r < maxr; r++) {
MODE_INFO **grid_row;
grid_row = &grid[(mi_row + r) * cm->mi_stride + mi_col];
for (c = 0; c < maxc; c++) {
if (!grid_row[c]->mbmi.skip) {
skip = 0;
bskip[count][0] = r;
bskip[count][1] = c;
count++;
}
}
}
*count_ptr = count;
return skip;
}
static INLINE void copy_8x8_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
dst[i * dstride + j] = src[i * sstride + j];
}
static INLINE void copy_4x4_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
dst[i * dstride + j] = src[i * sstride + j];
}
/* TODO: Optimize this function for SSE. */
void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride,
unsigned char (*bskip)[2], int dering_count, int bsize)
{
int bi, bx, by;
if (bsize == 3) {
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
copy_8x8_16_8bit(&dst[(by << 3) * dstride + (bx << 3)],
dstride,
&src[(by << 3) * sstride + (bx << 3)], sstride);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
copy_4x4_16_8bit(&dst[(by << 2) * dstride + (bx << 2)],
dstride,
&src[(by << 2) * sstride + (bx << 2)], sstride);
}
}
}
void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd, int global_level) {
int r, c;
int sbr, sbc;
int nhsb, nvsb;
od_dering_in *src[3];
unsigned char *bskip;
unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE][2];
int dering_count;
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride;
int bsize_x[3];
int bsize_y[3];
int dec_x[3];
int dec_y[3];
int bsize[3];
int dec[3];
int pli;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
int nplanes;
......@@ -68,19 +135,16 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
nplanes = 1;
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
bskip = aom_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
av1_setup_dst_planes(xd->plane, frame, 0, 0);
for (pli = 0; pli < nplanes; pli++) {
dec_x[pli] = xd->plane[pli].subsampling_x;
dec_y[pli] = xd->plane[pli].subsampling_y;
bsize_x[pli] = 8 >> dec_x[pli];
bsize_y[pli] = 8 >> dec_y[pli];
for (pli = 0; pli < 3; pli++) {
dec[pli] = xd->plane[pli].subsampling_x;
bsize[pli] = 8 >> dec[pli];
}
stride = bsize_x[0] * cm->mi_cols;
for (pli = 0; pli < nplanes; pli++) {
stride = bsize[0] * cm->mi_cols;
for (pli = 0; pli < 3; pli++) {
src[pli] = aom_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
for (r = 0; r < bsize_y[pli] * cm->mi_rows; ++r) {
for (c = 0; c < bsize_x[pli] * cm->mi_cols; ++c) {
for (r = 0; r < bsize[pli] * cm->mi_rows; ++r) {
for (c = 0; c < bsize[pli] * cm->mi_cols; ++c) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(
......@@ -95,13 +159,6 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
}
}
}
for (r = 0; r < cm->mi_rows; ++r) {
for (c = 0; c < cm->mi_cols; ++c) {
const MB_MODE_INFO *mbmi =
&cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
bskip[r * cm->mi_cols + c] = mbmi->skip;
}
}
for (sbr = 0; sbr < nvsb; sbr++) {
for (sbc = 0; sbc < nhsb; sbc++) {
int level;
......@@ -112,7 +169,8 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
global_level, cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.dering_gain);
if (level == 0 || sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE))
if (level == 0 ||
sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip, &dering_count))
continue;
for (pli = 0; pli < nplanes; pli++) {
int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
......@@ -124,39 +182,35 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
else
threshold = level << coeff_shift;
if (threshold == 0) continue;
od_dering(dst, MAX_MIB_SIZE * bsize_x[pli],
&src[pli][sbr * stride * bsize_x[pli] * MAX_MIB_SIZE +
sbc * bsize_x[pli] * MAX_MIB_SIZE],
stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec_x[pli],
dec_y[pli], dir, pli,
&bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
cm->mi_cols, threshold, coeff_shift);
for (r = 0; r < bsize_y[pli] * nvb; ++r) {
for (c = 0; c < bsize_x[pli] * nhb; ++c) {
od_dering(dst, MAX_MIB_SIZE * bsize[pli],
&src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
sbc * bsize[pli] * MAX_MIB_SIZE],
stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
bskip, dering_count, threshold, coeff_shift);
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
[xd->plane[pli].dst.stride *
(bsize_x[pli] * MAX_MIB_SIZE * sbr + r) +
sbc * bsize_x[pli] * MAX_MIB_SIZE + c] =
dst[r * MAX_MIB_SIZE * bsize_x[pli] + c];
} else {
if (cm->use_highbitdepth) {
copy_blocks_16bit(
(int16_t*)&CONVERT_TO_SHORTPTR(
xd->plane[pli].dst.buf)[xd->plane[pli].dst.stride *
(bsize[pli] * MAX_MIB_SIZE * sbr) +
sbc * bsize[pli] * MAX_MIB_SIZE],
xd->plane[pli].dst.stride, dst, MAX_MIB_SIZE * bsize[pli], bskip,
dering_count, 3 - dec[pli]);
} else {
#endif
xd->plane[pli]
.dst.buf[xd->plane[pli].dst.stride *
(bsize_x[pli] * MAX_MIB_SIZE * sbr + r) +
sbc * bsize_x[pli] * MAX_MIB_SIZE + c] =
dst[r * MAX_MIB_SIZE * bsize_x[pli] + c];
copy_blocks_16_8bit(
&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
(bsize[pli] * MAX_MIB_SIZE * sbr) +
sbc * bsize[pli] * MAX_MIB_SIZE],
xd->plane[pli].dst.stride, dst, MAX_MIB_SIZE * bsize[pli], bskip,
dering_count, 3 - dec[pli]);
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
}
}
#endif
}
}
}
for (pli = 0; pli < nplanes; pli++) {
aom_free(src[pli]);
}
aom_free(bskip);
}
......@@ -11,6 +11,8 @@
#ifndef AV1_COMMON_DERING_H_
#define AV1_COMMON_DERING_H_
// clang-format off
#include "av1/common/od_dering.h"
#include "av1/common/onyxc_int.h"
#include "aom/aom_integer.h"
......@@ -29,6 +31,8 @@ extern "C" {
int compute_level_from_index(int global_level, int gi);
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_all_skip_out(const AV1_COMMON *const cm, int mi_row, int mi_col,
unsigned char (*bskip)[2], int *count_ptr);
void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd, int global_level);
......
......@@ -183,19 +183,6 @@ int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
return (total_abs + 2) >> 2;
}
int od_filter_dering_direction_4x8(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir) {
return od_filter_dering_direction_4x4(y, ystride, in, threshold, dir)
+ od_filter_dering_direction_4x4(y + 4*ystride, ystride,
in + 4*OD_FILT_BSTRIDE, threshold, dir);
}
int od_filter_dering_direction_8x4(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir) {
return od_filter_dering_direction_4x4(y, ystride, in, threshold, dir)
+ od_filter_dering_direction_4x4(y + 4, ystride, in + 4, threshold, dir);
}
/* Smooth in the direction orthogonal to what was detected. */
void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
const int16_t *in, int threshold,
......@@ -254,21 +241,6 @@ void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
}
}
void od_filter_dering_orthogonal_4x8(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir) {
od_filter_dering_orthogonal_4x4(y, ystride, in, threshold, dir);
od_filter_dering_orthogonal_4x4(y + 4*ystride, ystride,
in + 4*OD_FILT_BSTRIDE, threshold, dir);
}
void od_filter_dering_orthogonal_8x4(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir) {
od_filter_dering_orthogonal_4x4(y, ystride, in, threshold, dir);
od_filter_dering_orthogonal_4x4(y + 4, ystride, in + 4, threshold, dir);
}
/* This table approximates x^0.16 with the index being log2(x). It is clamped
to [-.5, 3]. The table is computed as:
round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
......@@ -290,92 +262,116 @@ static INLINE int od_adjust_thresh(int threshold, int32_t var) {
return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
}
static INLINE void copy_8x8_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
dst[i * dstride + j] = src[i * sstride + j];
}
static INLINE void copy_4x4_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
dst[i * dstride + j] = src[i * sstride + j];
}
/* TODO: Optimize this function for SSE. */
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
unsigned char (*bskip)[2], int dering_count, int bsize)
{
int bi, bx, by;
if (bsize == 3) {
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
copy_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)],
dstride,
&src[(by << 3) * sstride + (bx << 3)], sstride);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
copy_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)],
dstride,
&src[(by << 2) * sstride + (bx << 2)], sstride);
}
}
}
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int nhb, int nvb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char *bskip, int skip_stride, int threshold,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char (*bskip)[2], int dering_count, int threshold,
int coeff_shift) {
int i;
int j;
int bi;
int bx;
int by;
int16_t inbuf[OD_DERING_INBUF_SIZE];
int16_t *in;
int bsize_x = 3 - xdec;
int bsize_y = 3 - ydec;
int bsize;
int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
int filter2_thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES] = {
od_filter_dering_direction_8x8, od_filter_dering_direction_8x4,
od_filter_dering_direction_4x8, od_filter_dering_direction_4x4
od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
};
od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES] = {
od_filter_dering_orthogonal_8x8, od_filter_dering_orthogonal_8x4,
od_filter_dering_orthogonal_4x8, od_filter_dering_orthogonal_4x4
od_filter_dering_orthogonal_4x4, od_filter_dering_orthogonal_8x8
};
int filter_idx = xdec*2 + ydec;
bsize = 3 - xdec;
in = inbuf + OD_FILT_BORDER * OD_FILT_BSTRIDE + OD_FILT_BORDER;
/* We avoid filtering the pixels for which some of the pixels to average
are outside the frame. We could change the filter instead, but it would
add special cases for any future vectorization. */
for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
for (i = -OD_FILT_BORDER * (sby != 0);
i < (nvb << bsize_y) + OD_FILT_BORDER * (sby != nvsb - 1); i++) {
i < (nvb << bsize) + OD_FILT_BORDER * (sby != nvsb - 1); i++) {
for (j = -OD_FILT_BORDER * (sbx != 0);
j < (nhb << bsize_x) + OD_FILT_BORDER * (sbx != nhsb - 1); j++) {
j < (nhb << bsize) + OD_FILT_BORDER * (sbx != nhsb - 1); j++) {
in[i * OD_FILT_BSTRIDE + j] = x[i * xstride + j];
}
}
/* Assume deringing filter is sparsely applied, so do one large copy rather
than small copies later if deringing is skipped. */
for (i = 0; i < nvb << bsize_y; i++) {
for (j = 0; j < nhb << bsize_x; j++) {
y[i * ystride + j] = in[i * OD_FILT_BSTRIDE + j];
}
}
if (pli == 0) {
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (bskip[by * skip_stride + bx]) continue;
dir[by][bx] = od_dir_find8(&x[8 * by * xstride + 8 * bx], xstride,
&var[by][bx], coeff_shift);
/* Deringing orthogonal to the direction uses a tighter threshold
because we want to be conservative. We've presumably already
achieved some deringing, so the amount of change is expected
to be low. Also, since we might be filtering across an edge, we
want to make sure not to blur it. That being said, we might want
to be a little bit more aggressive on pure horizontal/vertical
since the ringing there tends to be directional, so it doesn't
get removed by the directional filtering. */
filter2_thresh[by][bx] = (filter_dering_direction[filter_idx])(
&y[(by * ystride << bsize_y) + (bx << bsize_x)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize_y) + (bx << bsize_x)],
od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
}
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
dir[by][bx] = od_dir_find8(&x[8 * by * xstride + 8 * bx], xstride,
&var[by][bx], coeff_shift);
/* Deringing orthogonal to the direction uses a tighter threshold
because we want to be conservative. We've presumably already
achieved some deringing, so the amount of change is expected
to be low. Also, since we might be filtering across an edge, we
want to make sure not to blur it. That being said, we might want
to be a little bit more aggressive on pure horizontal/vertical
since the ringing there tends to be directional, so it doesn't
get removed by the directional filtering. */
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
}
} else {
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (bskip[by * skip_stride + bx]) continue;
filter2_thresh[by][bx] = (filter_dering_direction[filter_idx])(
&y[(by * ystride << bsize_y) + (bx << bsize_x)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize_y) + (bx << bsize_x)], threshold,
dir[by][bx]);
}
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
dir[by][bx]);
}
}
for (i = 0; i < nvb << bsize_y; i++) {
for (j = 0; j < nhb << bsize_x; j++) {
in[i * OD_FILT_BSTRIDE + j] = y[i * ystride + j];
}
}
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (bskip[by * skip_stride + bx] || filter2_thresh[by][bx] == 0) continue;
(filter_dering_orthogonal[filter_idx])(
&y[(by * ystride << bsize_y) + (bx << bsize_x)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize_y) + (bx << bsize_x)],
filter2_thresh[by][bx], dir[by][bx]);
}
copy_blocks_16bit(in, OD_FILT_BSTRIDE, y, ystride, bskip, dering_count,
bsize);
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];
if (filter2_thresh[by][bx] == 0) continue;
(filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
dir[by][bx]);
}
}
......@@ -12,6 +12,8 @@
#if !defined(_dering_H)
#define _dering_H (1)
// clang-format off
#include "odintrin.h"
#if defined(DAALA_ODINTRIN)
......@@ -19,7 +21,7 @@
typedef int16_t od_dering_in;
#endif
#define OD_DERINGSIZES (4)
#define OD_DERINGSIZES (2)
#define OD_DERING_NBLOCKS (OD_BSIZE_MAX / 8)
......@@ -34,26 +36,21 @@ typedef int (*od_filter_dering_direction_func)(int16_t *y, int ystride,
typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
const int16_t *in,
int threshold, int dir);
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
unsigned char (*bskip)[2], int dering_count, int bsize);
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int nvb, int nhb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char *bskip, int skip_stride, int threshold,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char (*bskip)[2], int skip_stride, int threshold,
int coeff_shift);
int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_4x8(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_8x4(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_8x8_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir);
void od_filter_dering_orthogonal_4x8(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
void od_filter_dering_orthogonal_8x4(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir);
......
......@@ -9,6 +9,8 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// clang-format off
#include <string.h>
#include <math.h>
......@@ -41,7 +43,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int nhsb, nvsb;
od_dering_in *src;
int16_t *ref_coeff;
unsigned char *bskip;
unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE][2];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride;
int bsize[3];
......@@ -49,10 +51,10 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int pli;
int level;
int best_level;
int dering_count;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
src = aom_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
ref_coeff = aom_malloc(sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
bskip = aom_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
av1_setup_dst_planes(xd->plane, frame, 0, 0);
for (pli = 0; pli < 3; pli++) {
dec[pli] = xd->plane[pli].subsampling_x;
......@@ -77,13 +79,6 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
#endif
}
}
for (r = 0; r < cm->mi_rows; ++r) {
for (c = 0; c < cm->mi_cols; ++c) {
const MB_MODE_INFO *mbmi =
&cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
bskip[r * cm->mi_cols + c] = mbmi->skip;
}
}
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
/* Pick a base threshold based on the quantizer. The threshold will then be
......@@ -105,20 +100,28 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
if (sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip, &dering_count))
continue;
best_gi = 0;
for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
int cur_mse;
int threshold;
level = compute_level_from_index(best_level, gi);
threshold = level << coeff_shift;
for (r = 0; r < bsize[0] * nvb; r++) {
for (c = 0; c < bsize[0] * nhb; c++) {
dst[r * MAX_MIB_SIZE * bsize[0] + c] =
src[(sbr * bsize[0] * MAX_MIB_SIZE + r) * stride +
sbc * bsize[0] * MAX_MIB_SIZE + c];
}
}
od_dering(dst, MAX_MIB_SIZE * bsize[0],
&src[sbr * stride * bsize[0] * MAX_MIB_SIZE +
sbc * bsize[0] * MAX_MIB_SIZE],
cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0, 0,
cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0,
dir, 0,
&bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
cm->mi_cols, threshold, coeff_shift);
bskip,
dering_count, threshold, coeff_shift);
cur_mse = (int)compute_dist(
dst, MAX_MIB_SIZE * bsize[0],
&ref_coeff[sbr * stride * bsize[0] * MAX_MIB_SIZE +
......@@ -136,6 +139,5 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
}
aom_free(src);
aom_free(ref_coeff);
aom_free(bskip);
return best_level;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment