Commit 94de0aaa authored by Steinar Midtskogen's avatar Steinar Midtskogen
Browse files

CDEF cleanup

Name changes and code moves to bring code more in line with the
design doc and an upcoming single-pass patch.  No functional changes.

Change-Id: I2bccd58c644e534b139f420b623390aa971fbdb0
parent 3cb5e39c
......@@ -236,12 +236,11 @@ if (CONFIG_CDEF)
${AOM_AV1_COMMON_SOURCES}
"${AOM_ROOT}/av1/common/clpf.c"
"${AOM_ROOT}/av1/common/clpf_simd.h"
"${AOM_ROOT}/av1/common/cdef_simd.h"
"${AOM_ROOT}/av1/common/cdef.c"
"${AOM_ROOT}/av1/common/cdef.h"
"${AOM_ROOT}/av1/common/od_dering.c"
"${AOM_ROOT}/av1/common/od_dering.h"
"${AOM_ROOT}/av1/common/od_dering_simd.h")
"${AOM_ROOT}/av1/common/cdef_block.c"
"${AOM_ROOT}/av1/common/cdef_block.h"
"${AOM_ROOT}/av1/common/cdef_block_simd.h")
set(AOM_AV1_ENCODER_SOURCES
${AOM_AV1_ENCODER_SOURCES}
......@@ -250,22 +249,22 @@ if (CONFIG_CDEF)
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/clpf_sse2.c"
"${AOM_ROOT}/av1/common/od_dering_sse2.c")
"${AOM_ROOT}/av1/common/cdef_block_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/clpf_ssse3.c"
"${AOM_ROOT}/av1/common/od_dering_ssse3.c")
"${AOM_ROOT}/av1/common/cdef_block_ssse3.c")
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/clpf_sse4.c"
"${AOM_ROOT}/av1/common/od_dering_sse4.c")
"${AOM_ROOT}/av1/common/cdef_block_sse4.c")
set(AOM_AV1_COMMON_INTRIN_NEON
${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/clpf_neon.c"
"${AOM_ROOT}/av1/common/od_dering_neon.c")
"${AOM_ROOT}/av1/common/cdef_block_neon.c")
endif ()
if (CONFIG_CONVOLVE_ROUND)
......
......@@ -92,18 +92,17 @@ endif
ifeq ($(CONFIG_CDEF),yes)
AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf_simd.h
AV1_COMMON_SRCS-yes += common/cdef_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/od_dering_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/od_dering_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/od_dering_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/od_dering_neon.c
AV1_COMMON_SRCS-yes += common/od_dering.c
AV1_COMMON_SRCS-yes += common/od_dering.h
AV1_COMMON_SRCS-yes += common/od_dering_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/cdef_block_neon.c
AV1_COMMON_SRCS-yes += common/cdef_block.c
AV1_COMMON_SRCS-yes += common/cdef_block.h
AV1_COMMON_SRCS-yes += common/cdef_block_simd.h
AV1_COMMON_SRCS-yes += common/cdef.c
AV1_COMMON_SRCS-yes += common/cdef.h
endif
......
......@@ -24,7 +24,6 @@ struct search_site_config;
struct mv;
union int_mv;
struct yv12_buffer_config;
typedef uint16_t od_dering_in;
EOF
}
forward_decls qw/av1_common_forward_decls/;
......@@ -561,9 +560,9 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
add_proto qw/void od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
......@@ -580,9 +579,9 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
specialize qw/od_dir_find8 sse2 ssse3 sse4_1 neon/;
specialize qw/od_filter_dering_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/od_filter_dering_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
......
This diff is collapsed.
......@@ -8,20 +8,19 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_COMMON_DERING_H_
#define AV1_COMMON_DERING_H_
#ifndef AV1_COMMON_CDEF_H_
#define AV1_COMMON_CDEF_H_
#define CDEF_STRENGTH_BITS 7
#define DERING_STRENGTHS 32
#define CLPF_STRENGTHS 4
#define CDEF_PRI_STRENGTHS 32
#define CDEF_SEC_STRENGTHS 4
#include "./aom_config.h"
#include "aom/aom_integer.h"
#include "aom_ports/mem.h"
#include "av1/common/od_dering.h"
#include "av1/common/cdef_block.h"
#include "av1/common/onyxc_int.h"
#include "./od_dering.h"
static INLINE int sign(int i) { return i < 0 ? -1 : 1; }
......@@ -40,8 +39,8 @@ extern "C" {
#endif
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist, int filter_skip);
int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
cdef_list *dlist, int filter_skip);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
......@@ -50,4 +49,4 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
#ifdef __cplusplus
} // extern "C"
#endif
#endif // AV1_COMMON_DERING_H_
#endif // AV1_COMMON_CDEF_H_
......@@ -21,17 +21,15 @@
#include "./cdef.h"
/* Generated from gen_filter_tables.c. */
const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
{ -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
-3 * OD_FILT_BSTRIDE + 3 },
{ 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
-1 * OD_FILT_BSTRIDE + 3 },
{ 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
{ 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
{ 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
{ 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
{ 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
{ 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
const int cdef_directions[8][3] = {
{ -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
{ 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
{ 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 },
{ 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 },
{ 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
};
/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
......@@ -41,8 +39,8 @@ const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
in a particular direction. Since each direction have the same sum(x^2) term,
that term is never computed. See Section 2, step 2, of:
http://jmvalin.ca/notes/intra_paint.pdf */
int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
int coeff_shift) {
int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
int coeff_shift) {
int i;
int32_t cost[8] = { 0 };
int partial[8][15] = { { 0 } };
......@@ -113,9 +111,8 @@ int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
}
/* Smooth in the direction detected. */
void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir, int damping) {
void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) {
int i;
int j;
int k;
......@@ -125,15 +122,13 @@ void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
int16_t sum;
int16_t xx;
int16_t yy;
xx = in[i * OD_FILT_BSTRIDE + j];
xx = in[i * CDEF_BSTRIDE + j];
sum = 0;
for (k = 0; k < 3; k++) {
int16_t p0;
int16_t p1;
p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
xx;
p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
xx;
p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
sum += taps[k] * constrain(p0, threshold, damping);
sum += taps[k] * constrain(p1, threshold, damping);
}
......@@ -145,9 +140,8 @@ void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
}
/* Smooth in the direction detected. */
void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir, int damping) {
void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) {
int i;
int j;
int k;
......@@ -157,15 +151,13 @@ void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
int16_t sum;
int16_t xx;
int16_t yy;
xx = in[i * OD_FILT_BSTRIDE + j];
xx = in[i * CDEF_BSTRIDE + j];
sum = 0;
for (k = 0; k < 2; k++) {
int16_t p0;
int16_t p1;
p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
xx;
p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
xx;
p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
sum += taps[k] * constrain(p0, threshold, damping);
sum += taps[k] * constrain(p1, threshold, damping);
}
......@@ -176,16 +168,16 @@ void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
}
}
/* Compute deringing filter threshold for an 8x8 block based on the
directional variance difference. A high variance difference means that we
have a highly directional pattern (e.g. a high contrast edge), so we can
apply more deringing. A low variance means that we either have a low
contrast edge, or a non-directional texture, so we want to be careful not
to blur. */
static INLINE int od_adjust_thresh(int threshold, int32_t var) {
/* Compute the primary filter strength for an 8x8 block based on the
directional variance difference. A high variance difference means
that we have a highly directional pattern (e.g. a high contrast
edge), so we can apply more deringing. A low variance means that we
either have a low contrast edge, or a non-directional texture, so
we want to be careful not to blur. */
static INLINE int adjust_strength(int strength, int32_t var) {
const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
/* We use the variance of 8x8 blocks to adjust the threshold. */
return var ? (threshold * (4 + i) + 8) >> 4 : 0;
/* We use the variance of 8x8 blocks to adjust the strength. */
return var ? (strength * (4 + i) + 8) >> 4 : 0;
}
void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
......@@ -202,20 +194,20 @@ void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
}
static void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride,
uint16_t *src, dering_list *dlist,
int dering_count, int bsize) {
static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
cdef_list *dlist, int cdef_count,
int bsize) {
int bi, bx, by;
if (bsize == BLOCK_8X8) {
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << (3 + 3)], 8);
}
} else if (bsize == BLOCK_4X8) {
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
......@@ -224,7 +216,7 @@ static void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride,
dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
}
} else if (bsize == BLOCK_8X4) {
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
......@@ -234,7 +226,7 @@ static void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride,
}
} else {
assert(bsize == BLOCK_4X4);
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
......@@ -259,19 +251,19 @@ void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}
static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride,
const uint16_t *src, dering_list *dlist,
int dering_count, int bsize) {
static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
const uint16_t *src, cdef_list *dlist,
int cdef_count, int bsize) {
int bi, bx, by;
if (bsize == BLOCK_8X8) {
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
&src[bi << (3 + 3)], 8);
}
} else if (bsize == BLOCK_4X8) {
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
......@@ -280,7 +272,7 @@ static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride,
dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
}
} else if (bsize == BLOCK_8X4) {
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
......@@ -290,7 +282,7 @@ static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride,
}
} else {
assert(bsize == BLOCK_4X4);
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
......@@ -305,12 +297,12 @@ int get_filter_skip(int level) {
return filter_skip;
}
void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int pli, dering_list *dlist, int dering_count, int level,
int clpf_strength, int clpf_damping, int dering_damping,
int coeff_shift, int skip_dering, int hbd) {
void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int sec_damping, int pri_damping,
int coeff_shift, int skip_dering, int hbd) {
int bi;
int bx;
int by;
......@@ -320,11 +312,10 @@ void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int filter_skip = get_filter_skip(level);
if (level == 1) threshold = 31 << coeff_shift;
od_filter_dering_direction_func filter_dering_direction[] = {
od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
};
clpf_damping += coeff_shift - (pli != AOM_PLANE_Y);
dering_damping += coeff_shift - (pli != AOM_PLANE_Y);
cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
cdef_direction_8x8 };
sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
bsize =
ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
bsizex = 3 - xdec;
......@@ -333,12 +324,11 @@ void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
if (!skip_dering) {
if (pli == 0) {
if (!dirinit || !*dirinit) {
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
dir[by][bx] =
od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
CDEF_BSTRIDE, &var[by][bx], coeff_shift);
}
if (dirinit) *dirinit = 1;
}
......@@ -348,24 +338,23 @@ void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
// something out in y[] later.
if (threshold != 0) {
assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
by = dlist[bi].by;
bx = dlist[bi].bx;
(filter_dering_direction[bsize == BLOCK_8X8])(
(cdef_direction[bsize == BLOCK_8X8])(
&y[bi << (bsizex + bsizey)], 1 << bsizex,
&in[(by * OD_FILT_BSTRIDE << bsizey) + (bx << bsizex)],
pli ? t : od_adjust_thresh(t, var[by][bx]), dir[by][bx],
dering_damping);
&in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
pri_damping);
}
}
}
if (clpf_strength) {
if (sec_strength) {
if (threshold && !skip_dering)
copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
bsize);
for (bi = 0; bi < dering_count; bi++) {
copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
int py = by << bsizey;
......@@ -378,31 +367,31 @@ void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
: aom_clpf_hblock_hbd)(
dst ? (uint16_t *)dst + py * dstride + px
: &y[bi << (bsizex + bsizey)],
in + py * OD_FILT_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
clpf_strength << coeff_shift, clpf_damping);
in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
sec_damping);
} else {
// Do clpf and write the result to an 8 bit destination
(!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
: aom_clpf_hblock)(
dst + py * dstride + px, in + py * OD_FILT_BSTRIDE + px, dstride,
OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
clpf_strength << coeff_shift, clpf_damping);
dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
sec_damping);
}
}
} else if (threshold != 0) {
// No clpf, so copy instead
if (hbd) {
copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
dering_count, bsize);
copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
bsize);
} else {
copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
}
} else if (dirinit) {
// If we're here, both dering and clpf are off, and we still haven't written
// anything to y[] yet, so we just copy the input to y[]. This is necessary
// only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
for (bi = 0; bi < dering_count; bi++) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
int iy, ix;
......@@ -410,7 +399,7 @@ void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
for (iy = 0; iy < 1 << bsizey; iy++)
for (ix = 0; ix < 1 << bsizex; ix++)
y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
in[((by << bsizey) + iy) * OD_FILT_BSTRIDE + (bx << bsizex) + ix];
in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
}
}
}
......@@ -9,43 +9,43 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#if !defined(_dering_H)
#define _dering_H (1)
#if !defined(_CDEF_BLOCK_H)
#define _CDEF_BLOCK_H (1)
#include "odintrin.h"
#include "./odintrin.h"
#define OD_DERING_NBLOCKS (MAX_SB_SIZE / 8)
#define CDEF_BLOCKSIZE 64
#define CDEF_BLOCKSIZE_LOG2 6
#define CDEF_NBLOCKS (CDEF_BLOCKSIZE / 8)
/* We need to buffer three vertical lines. */
#define OD_FILT_VBORDER (3)
#define CDEF_VBORDER (3)
/* We only need to buffer three horizontal pixels too, but let's align to
16 bytes (8 x 16 bits) to make vectorization easier. */
#define OD_FILT_HBORDER (8)
#define OD_FILT_BSTRIDE ALIGN_POWER_OF_TWO(MAX_SB_SIZE + 2 * OD_FILT_HBORDER, 3)
#define CDEF_HBORDER (8)
#define CDEF_BSTRIDE ALIGN_POWER_OF_TWO(CDEF_BLOCKSIZE + 2 * CDEF_HBORDER, 3)
#define OD_DERING_VERY_LARGE (30000)
#define OD_DERING_INBUF_SIZE \
(OD_FILT_BSTRIDE * (MAX_SB_SIZE + 2 * OD_FILT_VBORDER))
#define CDEF_VERY_LARGE (30000)
#define CDEF_INBUF_SIZE (CDEF_BSTRIDE * (CDEF_BLOCKSIZE + 2 * CDEF_VBORDER))
extern const int OD_DIRECTION_OFFSETS_TABLE[8][3];
extern const int cdef_directions[8][3];
typedef struct {
uint8_t by;
uint8_t bx;
uint8_t skip;
} dering_list;
} cdef_list;
typedef void (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
const uint16_t *in,
int threshold, int dir,
int damping);
typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, int dir,
int damping);
int get_filter_skip(int level);
void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int pli, dering_list *dlist, int dering_count, int level,
int clpf_strength, int clpf_damping, int dering_damping,
int coeff_shift, int skip_dering, int hbd);
void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int sec_damping, int pri_damping,
int coeff_shift, int skip_dering, int hbd);
#endif
......@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_neon
#include "./od_dering_simd.h"
#include "./cdef_block_simd.h"
......@@ -10,8 +10,7 @@
*/
#include "./av1_rtcd.h"
#include "./cdef_simd.h"
#include "./od_dering.h"
#include "./cdef_block.h"
/* partial A is a 16-bit vector of the form:
[x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
......@@ -155,8 +154,8 @@ static INLINE void array_reverse_transpose_8x8(v128 *in, v128 *res) {
res[0] = v128_ziphi_64(tr1_7, tr1_6);
}
int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var,
int coeff_shift) {
int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
int coeff_shift) {
int i;
int32_t cost[8];
int32_t best_cost = 0;
......@@ -211,42 +210,51 @@ int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var,
return best_dir;
}
void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
const uint16_t *in,
int threshold, int dir,
int damping) {
// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))