Commit 253c001f authored by Yaowu Xu's avatar Yaowu Xu
Browse files

Port dering experiment from aom

Mannually cherry-picked:
15791332 Use OD_DIVU for small divisions in temporal_filter.
03122298 Replace divides by small values with multiplies.
9c48eec7 Removing divisions from od_dir_find8()
0950ed82 Merge "Port active map / cyclic refresh fixes to vp10."
efefdad7 Port active map / cyclic refresh fixes to vp10.
1eaf748c Port switch to 9-bit rate cost to aom.
0b1606e7 Only build deringing code when --enable-dering.
e2511e15 Deringing cleanup: don't hardcode the number of levels
8fe5c5d6 Rename dering_in to od_dering_in to sync with Daala
4eb1380d Makes second filters for 45-degree directions horizontal
7f4c3f58 Removes the superblock variance contribution to the threshold
3dc56f93 Simplifying arithmetic by using multiply+shift
cf2aaba9 Return 0 explicitly for OD_ILOG(0).
49ca22aa Use the Daala implementation of OD_ILOG().
85187243 Fix compiler warning in od_dering.c.
485d6a69 Prevent multiple inclusion of odintrin.h.
51b7a998 Adds the Daala deringing filter as experimental

Note that a few of the changes were already in libvpx codebse.

Change-Id: I1c32ee7694e5ad22c98b06ff97737cd792cd88ae
parent 0818a7c8
......@@ -253,6 +253,7 @@ EXPERIMENT_LIST="
fp_mb_stats
emulate_hardware
clpf
dering
var_tx
rect_tx
ref_mv
......
......@@ -24,6 +24,11 @@ class ACMRandom {
explicit ACMRandom(int seed) : random_(seed) {}
void Reset(int seed) { random_.Reseed(seed); }
uint32_t Rand31(void) {
return random_.Generate(testing::internal::Random::kMaxRange);
}
uint16_t Rand16(void) {
const uint32_t value =
random_.Generate(testing::internal::Random::kMaxRange);
......
/*Daala video codec
Copyright (c) 2013 Daala project contributors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
#include <stdlib.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "vp10/common/odintrin.h"
using libvpx_test::ACMRandom;
TEST(Daala, TestDIVUuptoMAX) {
for (int d = 1; d <= OD_DIVU_DMAX; d++) {
for (uint32_t x = 1; x <= 1000000; x++) {
GTEST_ASSERT_EQ(x/d, OD_DIVU_SMALL(x, d)) << "x=" << x << " d=" << d <<
" x/d=" << (x/d) << " != " << OD_DIVU_SMALL(x, d);
}
}
}
TEST(Daala, TestDIVUrandI31) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int d = 1; d < OD_DIVU_DMAX; d++) {
for (int i = 0; i < 1000000; i++) {
uint32_t x = rnd.Rand31();
GTEST_ASSERT_EQ(x/d, OD_DIVU_SMALL(x, d)) << "x=" << x << " d=" << d <<
" x/d=" << (x/d) << " != " << OD_DIVU_SMALL(x, d);
}
}
}
......@@ -102,6 +102,7 @@ LIBVPX_TEST_SRCS-yes += partial_idct_test.cc
LIBVPX_TEST_SRCS-yes += superframe_test.cc
LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
LIBVPX_TEST_SRCS-yes += divu_small_test.cc
#LIBVPX_TEST_SRCS-yes += encoder_parms_get_to_decoder.cc
endif
......
......@@ -239,6 +239,8 @@ typedef struct {
int dq_off_index;
int send_dq_bit;
#endif // CONFIG_NEW_QUANT
/* deringing gain *per-superblock* */
int8_t dering_gain;
} MB_MODE_INFO;
typedef struct MODE_INFO {
......
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include <math.h>
#include "./vpx_scale_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vp10/common/dering.h"
#include "vp10/common/onyxc_int.h"
#include "vp10/common/reconinter.h"
#include "vp10/common/od_dering.h"
int compute_level_from_index(int global_level, int gi) {
static const int dering_gains[DERING_REFINEMENT_LEVELS] = {0, 11, 16, 22};
int level;
if (global_level == 0) return 0;
level = (global_level*dering_gains[gi] + 8) >> 4;
return clamp(level, gi, MAX_DERING_LEVEL-1);
}
int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col) {
int r, c;
int maxc, maxr;
int skip = 1;
maxc = cm->mi_cols - mi_col;
maxr = cm->mi_rows - mi_row;
if (maxr > MI_BLOCK_SIZE) maxr = MI_BLOCK_SIZE;
if (maxc > MI_BLOCK_SIZE) maxc = MI_BLOCK_SIZE;
for (r = 0; r < maxr; r++) {
for (c = 0; c < maxc; c++) {
skip = skip &&
cm->mi_grid_visible[(mi_row + r)*cm->mi_stride + mi_col + c]->
mbmi.skip;
}
}
return skip;
}
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
MACROBLOCKD *xd, int global_level) {
int r, c;
int sbr, sbc;
int nhsb, nvsb;
od_dering_in *src[3];
unsigned char *bskip;
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}};
int stride;
int bsize[3];
int dec[3];
int pli;
int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols);
vp10_setup_dst_planes(xd->plane, frame, 0, 0);
for (pli = 0; pli < 3; pli++) {
dec[pli] = xd->plane[pli].subsampling_x;
bsize[pli] = 8 >> dec[pli];
}
stride = bsize[0]*cm->mi_cols;
for (pli = 0; pli < 3; pli++) {
src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64);
for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) {
for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) {
#if CONFIG_VPX_HIGHBITDEPTH
if (cm->use_highbitdepth) {
src[pli][r * stride + c] =
CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
[r * xd->plane[pli].dst.stride + c];
} else {
#endif
src[pli][r * stride + c] =
xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
#if CONFIG_VPX_HIGHBITDEPTH
}
#endif
}
}
}
for (r = 0; r < cm->mi_rows; ++r) {
for (c = 0; c < cm->mi_cols; ++c) {
const MB_MODE_INFO *mbmi =
&cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
bskip[r * cm->mi_cols + c] = mbmi->skip;
}
}
for (sbr = 0; sbr < nvsb; sbr++) {
for (sbc = 0; sbc < nhsb; sbc++) {
int level;
int nhb, nvb;
nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc);
nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr);
for (pli = 0; pli < 3; pli++) {
int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8];
int threshold;
#if DERING_REFINEMENT
level = compute_level_from_index(
global_level,
cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride +
MI_BLOCK_SIZE*sbc]->mbmi.dering_gain);
#else
level = global_level;
#endif
/* FIXME: This is a temporary hack that uses more conservative
deringing for chroma. */
if (pli) level = (level*5 + 4) >> 3;
if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0;
threshold = level << coeff_shift;
od_dering(
&OD_DERING_VTBL_C,
dst,
MI_BLOCK_SIZE*bsize[pli],
&src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE +
sbc*bsize[pli]*MI_BLOCK_SIZE],
stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
&bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc],
cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift);
for (r = 0; r < bsize[pli]*nvb; ++r) {
for (c = 0; c < bsize[pli]*nhb; ++c) {
#if CONFIG_VPX_HIGHBITDEPTH
if (cm->use_highbitdepth) {
CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
[xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r)
+ sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
} else {
#endif
xd->plane[pli].dst.buf[xd->plane[pli].dst.stride*
(bsize[pli]*MI_BLOCK_SIZE*sbr + r) +
sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
#if CONFIG_VPX_HIGHBITDEPTH
}
#endif
}
}
}
}
}
for (pli = 0; pli < 3; pli++) {
vpx_free(src[pli]);
}
vpx_free(bskip);
}
#ifndef VP10_COMMON_DERING_H_
#define VP10_COMMON_DERING_H_
#include "vp10/common/od_dering.h"
#include "vp10/common/onyxc_int.h"
#include "vpx/vpx_integer.h"
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
#define DERING_LEVEL_BITS 6
#define MAX_DERING_LEVEL (1 << DERING_LEVEL_BITS)
#define DERING_REFINEMENT 1
#define DERING_REFINEMENT_BITS 2
#define DERING_REFINEMENT_LEVELS 4
int compute_level_from_index(int global_level, int gi);
int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col);
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
MACROBLOCKD *xd, int global_level);
int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
VP10_COMMON *cm,
MACROBLOCKD *xd);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_COMMON_DERING_H_
/*Daala video codec
Copyright (c) 2014-2016 Daala project contributors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdlib.h>
#include <math.h>
#include "dering.h"
const od_dering_opt_vtbl OD_DERING_VTBL_C = {
{od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c},
{od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c}
};
/* Generated from gen_filter_tables.c. */
const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
{-1*OD_FILT_BSTRIDE + 1, -2*OD_FILT_BSTRIDE + 2, -3*OD_FILT_BSTRIDE + 3 },
{ 0*OD_FILT_BSTRIDE + 1, -1*OD_FILT_BSTRIDE + 2, -1*OD_FILT_BSTRIDE + 3 },
{ 0*OD_FILT_BSTRIDE + 1, 0*OD_FILT_BSTRIDE + 2, 0*OD_FILT_BSTRIDE + 3 },
{ 0*OD_FILT_BSTRIDE + 1, 1*OD_FILT_BSTRIDE + 2, 1*OD_FILT_BSTRIDE + 3 },
{ 1*OD_FILT_BSTRIDE + 1, 2*OD_FILT_BSTRIDE + 2, 3*OD_FILT_BSTRIDE + 3 },
{ 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE + 1, 3*OD_FILT_BSTRIDE + 1 },
{ 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE + 0, 3*OD_FILT_BSTRIDE + 0 },
{ 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE - 1, 3*OD_FILT_BSTRIDE - 1 },
};
const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = {
0, 0.5, 0.707, 1, 1.41, 2
};
/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
The search minimizes the weighted variance along all the lines in a
particular direction, i.e. the squared error between the input and a
"predicted" block where each pixel is replaced by the average along a line
in a particular direction. Since each direction have the same sum(x^2) term,
that term is never computed. See Section 2, step 2, of:
http://jmvalin.ca/notes/intra_paint.pdf */
static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var,
int coeff_shift) {
int i;
int32_t cost[8] = {0};
int partial[8][15] = {{0}};
int32_t best_cost = 0;
int best_dir = 0;
/* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
The output is then 840 times larger, but we don't care for finding
the max. */
static const int div_table[] = {0, 840, 420, 280, 210, 168, 140, 120, 105};
for (i = 0; i < 8; i++) {
int j;
for (j = 0; j < 8; j++) {
int x;
/* We subtract 128 here to reduce the maximum range of the squared
partial sums. */
x = (img[i*stride + j] >> coeff_shift) - 128;
partial[0][i + j] += x;
partial[1][i + j/2] += x;
partial[2][i] += x;
partial[3][3 + i - j/2] += x;
partial[4][7 + i - j] += x;
partial[5][3 - i/2 + j] += x;
partial[6][j] += x;
partial[7][i/2 + j] += x;
}
}
for (i = 0; i < 8; i++) {
cost[2] += partial[2][i]*partial[2][i];
cost[6] += partial[6][i]*partial[6][i];
}
cost[2] *= div_table[8];
cost[6] *= div_table[8];
for (i = 0; i < 7; i++) {
cost[0] += (partial[0][i]*partial[0][i]
+ partial[0][14 - i]*partial[0][14 - i])*div_table[i + 1];
cost[4] += (partial[4][i]*partial[4][i]
+ partial[4][14 - i]*partial[4][14 - i])*div_table[i + 1];
}
cost[0] += partial[0][7]*partial[0][7]*div_table[8];
cost[4] += partial[4][7]*partial[4][7]*div_table[8];
for (i = 1; i < 8; i += 2) {
int j;
for (j = 0; j < 4 + 1; j++) {
cost[i] += partial[i][3 + j]*partial[i][3 + j];
}
cost[i] *= div_table[8];
for (j = 0; j < 4 - 1; j++) {
cost[i] += (partial[i][j]*partial[i][j]
+ partial[i][10 - j]*partial[i][10 - j])*div_table[2*j + 2];
}
}
for (i = 0; i < 8; i++) {
if (cost[i] > best_cost) {
best_cost = cost[i];
best_dir = i;
}
}
/* Difference between the optimal variance and the variance along the
orthogonal direction. Again, the sum(x^2) terms cancel out. */
*var = best_cost - cost[(best_dir + 4) & 7];
/* We'd normally divide by 840, but dividing by 1024 is close enough
for what we're going to do with this. */
*var >>= 10;
return best_dir;
}
#define OD_DERING_VERY_LARGE (30000)
#define OD_DERING_INBUF_SIZE ((OD_BSIZE_MAX + 2*OD_FILT_BORDER)*\
(OD_BSIZE_MAX + 2*OD_FILT_BORDER))
/* Smooth in the direction detected. */
void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
int ln, int threshold, int dir) {
int i;
int j;
int k;
static const int taps[3] = {3, 2, 2};
for (i = 0; i < 1 << ln; i++) {
for (j = 0; j < 1 << ln; j++) {
int16_t sum;
int16_t xx;
int16_t yy;
xx = in[i*OD_FILT_BSTRIDE + j];
sum= 0;
for (k = 0; k < 3; k++) {
int16_t p0;
int16_t p1;
p0 = in[i*OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]]
- xx;
p1 = in[i*OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]]
- xx;
if (abs(p0) < threshold) sum += taps[k]*p0;
if (abs(p1) < threshold) sum += taps[k]*p1;
}
yy = xx + ((sum + 8) >> 4);
y[i*ystride + j] = yy;
}
}
}
void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
const int16_t *in, int threshold, int dir) {
od_filter_dering_direction_c(y, ystride, in, 2, threshold, dir);
}
void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
const int16_t *in, int threshold, int dir) {
od_filter_dering_direction_c(y, ystride, in, 3, threshold, dir);
}
/* Smooth in the direction orthogonal to what was detected. */
void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
const od_dering_in *x, int xstride, int ln, int threshold, int dir) {
int i;
int j;
int offset;
if (dir > 0 && dir < 4) offset = OD_FILT_BSTRIDE;
else offset = 1;
for (i = 0; i < 1 << ln; i++) {
for (j = 0; j < 1 << ln; j++) {
int16_t athresh;
int16_t yy;
int16_t sum;
int16_t p;
/* Deringing orthogonal to the direction uses a tighter threshold
because we want to be conservative. We've presumably already
achieved some deringing, so the amount of change is expected
to be low. Also, since we might be filtering across an edge, we
want to make sure not to blur it. That being said, we might want
to be a little bit more aggressive on pure horizontal/vertical
since the ringing there tends to be directional, so it doesn't
get removed by the directional filtering. */
athresh = OD_MINI(threshold, threshold/3
+ abs(in[i*OD_FILT_BSTRIDE + j] - x[i*xstride + j]));
yy = in[i*OD_FILT_BSTRIDE + j];
sum = 0;
p = in[i*OD_FILT_BSTRIDE + j + offset] - yy;
if (abs(p) < athresh) sum += p;
p = in[i*OD_FILT_BSTRIDE + j - offset] - yy;
if (abs(p) < athresh) sum += p;
p = in[i*OD_FILT_BSTRIDE + j + 2*offset] - yy;
if (abs(p) < athresh) sum += p;
p = in[i*OD_FILT_BSTRIDE + j - 2*offset] - yy;
if (abs(p) < athresh) sum += p;
y[i*ystride + j] = yy + ((3*sum + 8) >> 4);
}
}
}
void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
const int16_t *in, const od_dering_in *x, int xstride, int threshold,
int dir) {
od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 2, threshold, dir);
}
void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
const int16_t *in, const od_dering_in *x, int xstride, int threshold,
int dir) {
od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 3, threshold, dir);
}
/* This table approximates x^0.16 with the index being log2(x). It is clamped
to [-.5, 3]. The table is computed as:
round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
static const int16_t OD_THRESH_TABLE_Q8[18] = {
128, 134, 150, 168, 188, 210, 234, 262,
292, 327, 365, 408, 455, 509, 569, 635,
710, 768,
};
/* Compute deringing filter threshold for each 8x8 block based on the
directional variance difference. A high variance difference means that we
have a highly directional pattern (e.g. a high contrast edge), so we can
apply more deringing. A low variance means that we either have a low
contrast edge, or a non-directional texture, so we want to be careful not
to blur. */
static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int threshold, int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int nhb, int nvb) {
int bx;
int by;
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
int v1;
/* We use the variance of 8x8 blocks to adjust the threshold. */
v1 = OD_MINI(32767, var[by][bx] >> 6);
thresh[by][bx] = (threshold*OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
}
}
}
void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
const od_dering_in *x, int xstride, int nhb, int nvb, int sbx, int sby,
int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int pli, unsigned char *bskip, int skip_stride, int threshold, int overlap,
int coeff_shift) {
int i;
int j;
int bx;
int by;
int16_t inbuf[OD_DERING_INBUF_SIZE];
int16_t *in;
int bsize;
int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
bsize = 3 - xdec;
in = inbuf + OD_FILT_BORDER*OD_FILT_BSTRIDE + OD_FILT_BORDER;
/* We avoid filtering the pixels for which some of the pixels to average
are outside the frame. We could change the filter instead, but it would
add special cases for any future vectorization. */
for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
for (i = -OD_FILT_BORDER*(sby != 0); i < (nvb << bsize)
+ OD_FILT_BORDER*(sby != nvsb - 1); i++) {
for (j = -OD_FILT_BORDER*(sbx != 0); j < (nhb << bsize)
+ OD_FILT_BORDER*(sbx != nhsb - 1); j++) {
in[i*OD_FILT_BSTRIDE + j] = x[i*xstride + j];
}
}
if (pli == 0) {
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
dir[by][bx] = od_dir_find8(&x[8*by*xstride + 8*bx], xstride,
&var[by][bx], coeff_shift);
}
}
od_compute_thresh(thresh, threshold, var, nhb, nvb);
}
else {
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
thresh[by][bx] = threshold;
}
}
}
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
int skip;
# if defined(DAALA_ODINTRIN)
int xstart;
int ystart;
int xend;
int yend;