Commit 94de0aaa authored by Steinar Midtskogen's avatar Steinar Midtskogen

CDEF cleanup

Name changes and code moves to bring code more in line with the
design doc and an upcoming single-pass patch.  No functional changes.

Change-Id: I2bccd58c644e534b139f420b623390aa971fbdb0
parent 3cb5e39c
......@@ -236,12 +236,11 @@ if (CONFIG_CDEF)
${AOM_AV1_COMMON_SOURCES}
"${AOM_ROOT}/av1/common/clpf.c"
"${AOM_ROOT}/av1/common/clpf_simd.h"
"${AOM_ROOT}/av1/common/cdef_simd.h"
"${AOM_ROOT}/av1/common/cdef.c"
"${AOM_ROOT}/av1/common/cdef.h"
"${AOM_ROOT}/av1/common/od_dering.c"
"${AOM_ROOT}/av1/common/od_dering.h"
"${AOM_ROOT}/av1/common/od_dering_simd.h")
"${AOM_ROOT}/av1/common/cdef_block.c"
"${AOM_ROOT}/av1/common/cdef_block.h"
"${AOM_ROOT}/av1/common/cdef_block_simd.h")
set(AOM_AV1_ENCODER_SOURCES
${AOM_AV1_ENCODER_SOURCES}
......@@ -250,22 +249,22 @@ if (CONFIG_CDEF)
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/clpf_sse2.c"
"${AOM_ROOT}/av1/common/od_dering_sse2.c")
"${AOM_ROOT}/av1/common/cdef_block_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/clpf_ssse3.c"
"${AOM_ROOT}/av1/common/od_dering_ssse3.c")
"${AOM_ROOT}/av1/common/cdef_block_ssse3.c")
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/clpf_sse4.c"
"${AOM_ROOT}/av1/common/od_dering_sse4.c")
"${AOM_ROOT}/av1/common/cdef_block_sse4.c")
set(AOM_AV1_COMMON_INTRIN_NEON
${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/clpf_neon.c"
"${AOM_ROOT}/av1/common/od_dering_neon.c")
"${AOM_ROOT}/av1/common/cdef_block_neon.c")
endif ()
if (CONFIG_CONVOLVE_ROUND)
......
......@@ -92,18 +92,17 @@ endif
ifeq ($(CONFIG_CDEF),yes)
AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf_simd.h
AV1_COMMON_SRCS-yes += common/cdef_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/od_dering_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/od_dering_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/od_dering_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/od_dering_neon.c
AV1_COMMON_SRCS-yes += common/od_dering.c
AV1_COMMON_SRCS-yes += common/od_dering.h
AV1_COMMON_SRCS-yes += common/od_dering_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/cdef_block_neon.c
AV1_COMMON_SRCS-yes += common/cdef_block.c
AV1_COMMON_SRCS-yes += common/cdef_block.h
AV1_COMMON_SRCS-yes += common/cdef_block_simd.h
AV1_COMMON_SRCS-yes += common/cdef.c
AV1_COMMON_SRCS-yes += common/cdef.h
endif
......
......@@ -24,7 +24,6 @@ struct search_site_config;
struct mv;
union int_mv;
struct yv12_buffer_config;
typedef uint16_t od_dering_in;
EOF
}
forward_decls qw/av1_common_forward_decls/;
......@@ -561,9 +560,9 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
add_proto qw/void od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
......@@ -580,9 +579,9 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
specialize qw/od_dir_find8 sse2 ssse3 sse4_1 neon/;
specialize qw/od_filter_dering_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/od_filter_dering_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
......
This diff is collapsed.
......@@ -8,20 +8,19 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_COMMON_DERING_H_
#define AV1_COMMON_DERING_H_
#ifndef AV1_COMMON_CDEF_H_
#define AV1_COMMON_CDEF_H_
#define CDEF_STRENGTH_BITS 7
#define DERING_STRENGTHS 32
#define CLPF_STRENGTHS 4
#define CDEF_PRI_STRENGTHS 32
#define CDEF_SEC_STRENGTHS 4
#include "./aom_config.h"
#include "aom/aom_integer.h"
#include "aom_ports/mem.h"
#include "av1/common/od_dering.h"
#include "av1/common/cdef_block.h"
#include "av1/common/onyxc_int.h"
#include "./od_dering.h"
static INLINE int sign(int i) { return i < 0 ? -1 : 1; }
......@@ -40,8 +39,8 @@ extern "C" {
#endif
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist, int filter_skip);
int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
cdef_list *dlist, int filter_skip);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
......@@ -50,4 +49,4 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
#ifdef __cplusplus
} // extern "C"
#endif
#endif // AV1_COMMON_DERING_H_
#endif // AV1_COMMON_CDEF_H_
......@@ -9,43 +9,43 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#if !defined(_dering_H)
#define _dering_H (1)
#if !defined(_CDEF_BLOCK_H)
#define _CDEF_BLOCK_H (1)
#include "odintrin.h"
#include "./odintrin.h"
#define OD_DERING_NBLOCKS (MAX_SB_SIZE / 8)
#define CDEF_BLOCKSIZE 64
#define CDEF_BLOCKSIZE_LOG2 6
#define CDEF_NBLOCKS (CDEF_BLOCKSIZE / 8)
/* We need to buffer three vertical lines. */
#define OD_FILT_VBORDER (3)
#define CDEF_VBORDER (3)
/* We only need to buffer three horizontal pixels too, but let's align to
16 bytes (8 x 16 bits) to make vectorization easier. */
#define OD_FILT_HBORDER (8)
#define OD_FILT_BSTRIDE ALIGN_POWER_OF_TWO(MAX_SB_SIZE + 2 * OD_FILT_HBORDER, 3)
#define CDEF_HBORDER (8)
#define CDEF_BSTRIDE ALIGN_POWER_OF_TWO(CDEF_BLOCKSIZE + 2 * CDEF_HBORDER, 3)
#define OD_DERING_VERY_LARGE (30000)
#define OD_DERING_INBUF_SIZE \
(OD_FILT_BSTRIDE * (MAX_SB_SIZE + 2 * OD_FILT_VBORDER))
#define CDEF_VERY_LARGE (30000)
#define CDEF_INBUF_SIZE (CDEF_BSTRIDE * (CDEF_BLOCKSIZE + 2 * CDEF_VBORDER))
extern const int OD_DIRECTION_OFFSETS_TABLE[8][3];
extern const int cdef_directions[8][3];
typedef struct {
uint8_t by;
uint8_t bx;
uint8_t skip;
} dering_list;
} cdef_list;
typedef void (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
const uint16_t *in,
int threshold, int dir,
int damping);
typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, int dir,
int damping);
int get_filter_skip(int level);
void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
int pli, dering_list *dlist, int dering_count, int level,
int clpf_strength, int clpf_damping, int dering_damping,
int coeff_shift, int skip_dering, int hbd);
void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int sec_damping, int pri_damping,
int coeff_shift, int skip_dering, int hbd);
#endif
......@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_neon
#include "./od_dering_simd.h"
#include "./cdef_block_simd.h"
......@@ -10,8 +10,7 @@
*/
#include "./av1_rtcd.h"
#include "./cdef_simd.h"
#include "./od_dering.h"
#include "./cdef_block.h"
/* partial A is a 16-bit vector of the form:
[x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
......@@ -155,8 +154,8 @@ static INLINE void array_reverse_transpose_8x8(v128 *in, v128 *res) {
res[0] = v128_ziphi_64(tr1_7, tr1_6);
}
int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var,
int coeff_shift) {
int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
int coeff_shift) {
int i;
int32_t cost[8];
int32_t best_cost = 0;
......@@ -211,42 +210,51 @@ int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var,
return best_dir;
}
void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
const uint16_t *in,
int threshold, int dir,
int damping) {
// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
unsigned int adjdamp) {
v128 diff = v128_sub_16(a, b);
const v128 sign = v128_shr_n_s16(diff, 15);
diff = v128_abs_s16(diff);
const v128 s =
v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
}
void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) {
int i;
v128 p0, p1, sum, row, res;
int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
int o1 = cdef_directions[dir][0];
int o2 = cdef_directions[dir][1];
if (threshold) damping -= get_msb(threshold);
for (i = 0; i < 4; i += 2) {
sum = v128_zero();
row = v128_from_v64(v64_load_aligned(&in[i * OD_FILT_BSTRIDE]),
v64_load_aligned(&in[(i + 1) * OD_FILT_BSTRIDE]));
row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
// p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]),
v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o1]));
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1]));
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]),
v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o1]));
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1]));
p1 = constrain16(p1, row, threshold, damping);
// sum += 4 * (p0 + p1)
sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
// p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]),
v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o2]));
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2]));
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]),
v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o2]));
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2]));
p1 = constrain16(p1, row, threshold, damping);
// sum += 1 * (p0 + p1)
......@@ -261,27 +269,25 @@ void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
}
}
void SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
const uint16_t *in,
int threshold, int dir,
int damping) {
void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) {
int i;
v128 sum, p0, p1, row, res;
int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
int o3 = OD_DIRECTION_OFFSETS_TABLE[dir][2];
int o1 = cdef_directions[dir][0];
int o2 = cdef_directions[dir][1];
int o3 = cdef_directions[dir][2];
if (threshold) damping -= get_msb(threshold);
for (i = 0; i < 8; i++) {
sum = v128_zero();
row = v128_load_aligned(&in[i * OD_FILT_BSTRIDE]);
row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
// p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]);
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]);
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]);
p1 = constrain16(p1, row, threshold, damping);
// sum += 3 * (p0 + p1)
......@@ -289,24 +295,24 @@ void SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
sum = v128_add_16(sum, p0);
// p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]);
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]);
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]);
p1 = constrain16(p1, row, threshold, damping);
// sum += 2 * (p0 + p1)
p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
sum = v128_add_16(sum, p0);
// p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o3]);
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]);
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o3]);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]);
p1 = constrain16(p1, row, threshold, damping);
// sum += (p0 + p1)
......
......@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse2
#include "./od_dering_simd.h"
#include "./cdef_block_simd.h"
......@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse4_1
#include "./od_dering_simd.h"
#include "./cdef_block_simd.h"
......@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_ssse3
#include "./od_dering_simd.h"
#include "./cdef_block_simd.h"
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_COMMON_CDEF_SIMD_H_
#define AV1_COMMON_CDEF_SIMD_H_
#include "aom_dsp/aom_simd.h"
// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
unsigned int adjdamp) {
v128 diff = v128_sub_16(a, b);
const v128 sign = v128_shr_n_s16(diff, 15);
diff = v128_abs_s16(diff);
const v128 s =
v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
}
#endif // AV1_COMMON_CDEF_SIMD_H_
......@@ -10,10 +10,20 @@
*/
#include "./av1_rtcd.h"
#include "./cdef_simd.h"
#include "aom_ports/bitops.h"
#include "aom_ports/mem.h"
// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
unsigned int adjdamp) {
v128 diff = v128_sub_16(a, b);
const v128 sign = v128_shr_n_s16(diff, 15);
diff = v128_abs_s16(diff);
const v128 s =
v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
}
// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
unsigned int adjdamp) {
......
......@@ -425,8 +425,8 @@ typedef struct AV1Common {
int mib_size; // Size of the superblock in units of MI blocks
int mib_size_log2; // Log 2 of above.
#if CONFIG_CDEF
int cdef_dering_damping;
int cdef_clpf_damping;
int cdef_pri_damping;
int cdef_sec_damping;
int nb_cdef_strengths;
int cdef_strengths[CDEF_MAX_STRENGTHS];
int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
......
......@@ -3015,8 +3015,8 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
#if CONFIG_CDEF
static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
int i;
cm->cdef_dering_damping = aom_rb_read_literal(rb, 1) + 5;
cm->cdef_clpf_damping = aom_rb_read_literal(rb, 2) + 3;
cm->cdef_pri_damping = aom_rb_read_literal(rb, 1) + 5;
cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
cm->cdef_bits = aom_rb_read_literal(rb, 2);
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (i = 0; i < cm->nb_cdef_strengths; i++) {
......
......@@ -101,9 +101,10 @@ int ifd_inspect(insp_frame_data *fd, void *decoder) {
mi->tx_size = mbmi->tx_size;
#if CONFIG_CDEF
mi->cdef_level = cm->cdef_strengths[mbmi->cdef_strength] / CLPF_STRENGTHS;
mi->cdef_level =
cm->cdef_strengths[mbmi->cdef_strength] / CDEF_SEC_STRENGTHS;
mi->cdef_strength =
cm->cdef_strengths[mbmi->cdef_strength] % CLPF_STRENGTHS;
cm->cdef_strengths[mbmi->cdef_strength] % CDEF_SEC_STRENGTHS;
mi->cdef_strength += mi->cdef_strength == 3;
#endif
#if CONFIG_CFL
......
......@@ -3474,8 +3474,8 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
#if CONFIG_CDEF
static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
int i;
aom_wb_write_literal(wb, cm->cdef_dering_damping - 5, 1);
aom_wb_write_literal(wb, cm->cdef_clpf_damping - 3, 2);
aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
......
This diff is collapsed.
......@@ -17,7 +17,7 @@
#include "./aom_config.h"
#include "./av1_rtcd.h"
#include "aom_ports/aom_timer.h"
#include "av1/common/od_dering.h"
#include "av1/common/cdef_block.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
......@@ -128,22 +128,22 @@ void test_clpf(int w, int h, unsigned int depth, unsigned int iterations,
if (boundary & 1) { // Left
for (int i = 0; i < size; i++)
for (int j = 0; j < xpos; j++)
s[i * size + j] = OD_DERING_VERY_LARGE;
s[i * size + j] = CDEF_VERY_LARGE;
}
if (boundary & 2) { // Right
for (int i = 0; i < size; i++)
for (int j = xpos + w; j < size; j++)
s[i * size + j] = OD_DERING_VERY_LARGE;
s[i * size + j] = CDEF_VERY_LARGE;
}
if (boundary & 4) { // Above
for (int i = 0; i < ypos; i++)
for (int j = 0; j < size; j++)
s[i * size + j] = OD_DERING_VERY_LARGE;
s[i * size + j] = CDEF_VERY_LARGE;
}
if (boundary & 8) { // Below
for (int i = ypos + h; i < size; i++)
for (int j = 0; j < size; j++)
s[i * size + j] = OD_DERING_VERY_LARGE;
s[i * size + j] = CDEF_VERY_LARGE;
}
}
for (strength = depth - 8; strength < depth - 5 && !error;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment