Commit a9d41e88 authored by Steinar Midtskogen's avatar Steinar Midtskogen Committed by Jean-Marc Valin

Merge dering/clpf rdo and filtering

* Dering and clpf were merged into a single pass.
* 32x32 and 128x128 filter block sizes for clpf were removed.
* RDO for dering and clpf merged and improved:
  - "0" no longer required to be in the strength selection
  - Dering strength can now be 0, 1 or 2 bits per block

              LL    HL
PSNR:       -0.04 -0.01
PSNR HVS:   -0.27 -0.18
SSIM:       -0.15 +0.01
CIEDE 2000: -0.11 -0.03
APSNR:      -0.03 -0.00
MS SSIM:    -0.18 -0.11

Change-Id: I9f002a16ad218eab6007f90f1f176232443495f0
parent f5931e5e
......@@ -852,19 +852,21 @@ add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint
specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd";
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd";
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
}
}
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
}
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int dmp";
......
......@@ -215,8 +215,8 @@ if (CONFIG_CDEF)
"${AOM_ROOT}/av1/common/clpf.h"
"${AOM_ROOT}/av1/common/clpf_simd.h"
"${AOM_ROOT}/av1/common/clpf_simd_kernel.h"
"${AOM_ROOT}/av1/common/dering.c"
"${AOM_ROOT}/av1/common/dering.h"
"${AOM_ROOT}/av1/common/cdef.c"
"${AOM_ROOT}/av1/common/cdef.h"
"${AOM_ROOT}/av1/common/od_dering.c"
"${AOM_ROOT}/av1/common/od_dering.h")
......@@ -224,7 +224,7 @@ if (CONFIG_CDEF)
${AOM_AV1_ENCODER_SOURCES}
"${AOM_ROOT}/av1/encoder/clpf_rdo.c"
"${AOM_ROOT}/av1/encoder/clpf_rdo.h"
"${AOM_ROOT}/av1/encoder/pickdering.c")
"${AOM_ROOT}/av1/encoder/pickcdef.c")
set(AOM_AV1_COMMON_SSE2_INTRIN
${AOM_AV1_COMMON_SSE2_INTRIN}
......
......@@ -97,8 +97,8 @@ AV1_COMMON_SRCS-yes += common/od_dering.c
AV1_COMMON_SRCS-yes += common/od_dering.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/od_dering_sse4.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/od_dering_sse4.h
AV1_COMMON_SRCS-yes += common/dering.c
AV1_COMMON_SRCS-yes += common/dering.h
AV1_COMMON_SRCS-yes += common/cdef.c
AV1_COMMON_SRCS-yes += common/cdef.h
endif
ifeq ($(CONFIG_ACCOUNTING),yes)
AV1_COMMON_SRCS-yes += common/accounting.h
......
......@@ -109,7 +109,7 @@ AV1_CX_SRCS-yes += encoder/temporal_filter.h
AV1_CX_SRCS-yes += encoder/mbgraph.c
AV1_CX_SRCS-yes += encoder/mbgraph.h
ifeq ($(CONFIG_CDEF),yes)
AV1_CX_SRCS-yes += encoder/pickdering.c
AV1_CX_SRCS-yes += encoder/pickcdef.c
AV1_CX_SRCS-yes += encoder/clpf_rdo.c
AV1_CX_SRCS-yes += encoder/clpf_rdo.h
AV1_CX_SRCS-yes += encoder/clpf_rdo_simd.h
......
......@@ -21,7 +21,7 @@ struct search_site_config;
struct mv;
union int_mv;
struct yv12_buffer_config;
typedef int16_t od_dering_in;
typedef uint16_t od_dering_in;
EOF
}
forward_decls qw/av1_common_forward_decls/;
......@@ -755,10 +755,10 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
specialize qw/od_dir_find8 sse4_1/;
add_proto qw/int od_filter_dering_direction_4x4/, "int16_t *y, int ystride, const int16_t *in, int threshold, int dir";
add_proto qw/int od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
specialize qw/od_filter_dering_direction_4x4 sse4_1/;
add_proto qw/int od_filter_dering_direction_8x8/, "int16_t *y, int ystride, const int16_t *in, int threshold, int dir";
add_proto qw/int od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
specialize qw/od_filter_dering_direction_8x8 sse4_1/;
}
......
......@@ -370,6 +370,7 @@ typedef struct {
#endif // CONFIG_NEW_QUANT
/* deringing gain *per-superblock* */
int8_t dering_gain;
int8_t clpf_strength;
#if CONFIG_DELTA_Q
int current_q_index;
#endif
......
......@@ -9,22 +9,87 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <string.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#include "./aom_scale_rtcd.h"
#include "aom/aom_integer.h"
#include "av1/common/dering.h"
#include "av1/common/cdef.h"
#include "av1/common/od_dering.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/reconinter.h"
#include "av1/common/od_dering.h"
int compute_level_from_index(int global_level, int gi) {
static const int dering_gains[DERING_REFINEMENT_LEVELS] = { 0, 11, 16, 22 };
int level;
if (global_level == 0) return 0;
level = (global_level * dering_gains[gi] + 8) >> 4;
return clamp(level, gi, MAX_DERING_LEVEL - 1);
int dering_level_table[DERING_STRENGTHS] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, 24, 28, 33, 39, 46, 54, 63
};
#ifndef NDEBUG
static int is_sorted(const int *arr, int num) {
int sorted = 1;
while (sorted && num-- > 1) sorted &= arr[num] >= arr[num - 1];
return sorted;
}
#endif
uint32_t levels_to_id(const int lev[DERING_REFINEMENT_LEVELS],
const int str[CLPF_REFINEMENT_LEVELS]) {
uint32_t id = 0;
int i;
assert(is_sorted(lev, DERING_REFINEMENT_LEVELS));
assert(is_sorted(str, CLPF_REFINEMENT_LEVELS));
for (i = 0; i < DERING_REFINEMENT_LEVELS; i++)
id = id * DERING_STRENGTHS + lev[i];
for (i = 0; i < CLPF_REFINEMENT_LEVELS; i++)
id = id * CLPF_STRENGTHS + str[i];
return id;
}
void id_to_levels(int lev[DERING_REFINEMENT_LEVELS],
int str[CLPF_REFINEMENT_LEVELS], uint32_t id) {
int i;
for (i = CLPF_REFINEMENT_LEVELS - 1; i >= 0; i--) {
str[i] = id % CLPF_STRENGTHS;
id /= CLPF_STRENGTHS;
}
for (i = DERING_REFINEMENT_LEVELS - 1; i >= 0; i--) {
lev[i] = id % DERING_STRENGTHS;
id /= DERING_STRENGTHS;
}
// Pack tables
int j;
for (i = j = 1; i < DERING_REFINEMENT_LEVELS && j < DERING_REFINEMENT_LEVELS;
i++)
if (lev[j - 1] == lev[j])
memmove(&lev[j - 1], &lev[j],
(DERING_REFINEMENT_LEVELS - j) * sizeof(*lev));
else
j++;
for (i = j = 1; i < CLPF_REFINEMENT_LEVELS && j < DERING_REFINEMENT_LEVELS;
i++)
if (str[j - 1] == str[j])
memmove(&str[j - 1], &str[j],
(CLPF_REFINEMENT_LEVELS - i) * sizeof(*str));
else
j++;
assert(is_sorted(lev, DERING_REFINEMENT_LEVELS));
assert(is_sorted(str, CLPF_REFINEMENT_LEVELS));
}
void cdef_get_bits(const int *lev, const int *str, int *dering_bits,
int *clpf_bits) {
int i;
*dering_bits = *clpf_bits = 1;
for (i = 1; i < DERING_REFINEMENT_LEVELS; i++)
(*dering_bits) += lev[i] != lev[i - 1];
for (i = 1; i < CLPF_REFINEMENT_LEVELS; i++)
(*clpf_bits) += str[i] != str[i - 1];
*dering_bits = get_msb(*dering_bits);
*clpf_bits = get_msb(*clpf_bits);
}
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
......@@ -82,7 +147,7 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
}
static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
int16_t *src, int sstride) {
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
......@@ -90,7 +155,7 @@ static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
}
static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
int16_t *src, int sstride) {
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
......@@ -98,7 +163,7 @@ static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
}
/* TODO: Optimize this function for SSE. */
void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src,
void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, uint16_t *src,
dering_list *dlist, int dering_count,
int bsize) {
int bi, bx, by;
......@@ -120,11 +185,10 @@ void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src,
}
/* TODO: Optimize this function for SSE. */
static void copy_sb8_16(AV1_COMMON *cm, int16_t *dst, int dstride,
static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
const uint8_t *src, int src_voffset, int src_hoffset,
int sstride, int vsize, int hsize) {
int r, c;
(void)cm;
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
const uint16_t *base =
......@@ -134,26 +198,28 @@ static void copy_sb8_16(AV1_COMMON *cm, int16_t *dst, int dstride,
dst[r * dstride + c] = base[r * sstride + c];
}
}
} else
} else {
#endif
{
const uint8_t *base = &src[src_voffset * sstride + src_hoffset];
for (r = 0; r < vsize; r++) {
for (c = 0; c < hsize; c++) {
dst[r * dstride + c] = base[r * sstride + c];
}
}
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
}
void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd, int global_level) {
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
uint32_t global_level, int clpf_strength_u,
int clpf_strength_v) {
int r, c;
int sbr, sbc;
int nhsb, nvsb;
int16_t src[OD_DERING_INBUF_SIZE];
int16_t *linebuf[3];
int16_t colbuf[3][OD_BSIZE_MAX + 2 * OD_FILT_VBORDER][OD_FILT_HBORDER];
uint16_t src[OD_DERING_INBUF_SIZE];
uint16_t *linebuf[3];
uint16_t colbuf[3][OD_BSIZE_MAX + 2 * OD_FILT_VBORDER][OD_FILT_HBORDER];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
int dering_count;
......@@ -164,12 +230,13 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int pli;
int dering_left;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
int nplanes;
if (xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y)
nplanes = 3;
else
nplanes = 1;
int nplanes = 3;
int lev[DERING_REFINEMENT_LEVELS];
int str[CLPF_REFINEMENT_LEVELS];
int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
id_to_levels(lev, str, global_level);
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
av1_setup_dst_planes(xd->plane, frame, 0, 0);
......@@ -195,29 +262,46 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
}
dering_left = 1;
for (sbc = 0; sbc < nhsb; sbc++) {
int level;
int level, clpf_strength;
int nhb, nvb;
int cstart = 0;
BOUNDARY_TYPE boundary_type =
cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.boundary_info;
if (!dering_left) cstart = -OD_FILT_HBORDER;
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
level = compute_level_from_index(
global_level, cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.dering_gain);
level = dering_level_table
[lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.dering_gain]];
clpf_strength =
str[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.clpf_strength];
clpf_strength += clpf_strength == 3;
curr_row_dering[sbc] = 0;
if (level == 0 ||
if ((level == 0 && clpf_strength == 0) ||
(dering_count = sb_compute_dering_list(
cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) {
dering_left = 0;
continue;
}
curr_row_dering[sbc] = 1;
for (pli = 0; pli < nplanes; pli++) {
int16_t dst[OD_BSIZE_MAX * OD_BSIZE_MAX];
uint16_t dst[OD_BSIZE_MAX * OD_BSIZE_MAX];
int threshold;
int coffset;
int rend, cend;
int clpf_damping = 3 - (pli != AOM_PLANE_Y) + (cm->base_qindex >> 6);
if (pli) {
if (!chroma_dering) level = 0;
clpf_strength = pli == 1 ? clpf_strength_u : clpf_strength_v;
clpf_strength += clpf_strength == 3;
}
if (sbc == nhsb - 1)
cend = (nhb << bsize[pli]);
else
......@@ -347,14 +431,15 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
threshold = (level * 5 + 4) >> 3 << coeff_shift;
else
threshold = level << coeff_shift;
if (threshold == 0) continue;
od_dering(
dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
dec[pli], dir, pli, dlist, dering_count, threshold, coeff_shift);
if (threshold == 0 && clpf_strength == 0) continue;
od_dering(dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
dec[pli], dir, pli, dlist, dering_count, threshold,
clpf_strength, clpf_damping, coeff_shift, boundary_type);
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
copy_dering_16bit_to_16bit(
(int16_t *)&CONVERT_TO_SHORTPTR(
&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
......
......@@ -11,32 +11,48 @@
#ifndef AV1_COMMON_DERING_H_
#define AV1_COMMON_DERING_H_
#include "av1/common/od_dering.h"
#include "av1/common/onyxc_int.h"
#include "aom/aom_integer.h"
// ceil(log2(DERING_STRENGTHS^DERING_REFINEMENT_LEVELS *
// CLPF_STRENGTHS^CLPF_REFINEMENT_LEVELS))
#define DERING_LEVEL_BITS (22)
#define MAX_DERING_LEVEL (1LL << DERING_LEVEL_BITS)
#define DERING_REFINEMENT_BITS 2
#define DERING_REFINEMENT_LEVELS 4
#define CLPF_REFINEMENT_BITS 1
#define CLPF_REFINEMENT_LEVELS 2
#define DERING_STRENGTHS 21
#define CLPF_STRENGTHS 4
#include "./aom_config.h"
#include "aom/aom_integer.h"
#include "aom_ports/mem.h"
#include "od_dering.h"
#include "av1/common/od_dering.h"
#include "av1/common/onyxc_int.h"
#include "./od_dering.h"
#ifdef __cplusplus
extern "C" {
#endif
#define DERING_LEVEL_BITS 6
#define MAX_DERING_LEVEL (1 << DERING_LEVEL_BITS)
extern int dering_level_table[DERING_STRENGTHS];
#define DERING_REFINEMENT_BITS 2
#define DERING_REFINEMENT_LEVELS 4
uint32_t levels_to_id(const int lev[DERING_REFINEMENT_LEVELS],
const int str[CLPF_REFINEMENT_LEVELS]);
void id_to_levels(int lev[DERING_REFINEMENT_LEVELS],
int str[CLPF_REFINEMENT_LEVELS], uint32_t id);
void cdef_get_bits(const int *lev, const int *str, int *dering_bits,
int *clpf_bits);
int compute_level_from_index(int global_level, int gi);
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist);
void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd, int global_level);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
uint32_t global_level, int clpf_strength_u,
int clpf_strength_v);
int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd);
#ifdef __cplusplus
} // extern "C"
......
This diff is collapsed.
......@@ -13,20 +13,6 @@
#include "av1/common/reconinter.h"
#define MAX_FB_SIZE_LOG2 7
#define MIN_FB_SIZE_LOG2 5
#define MAX_FB_SIZE (1 << MAX_FB_SIZE_LOG2)
#define MIN_FB_SIZE (1 << MIN_FB_SIZE_LOG2)
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
int H, int b, unsigned int dmp);
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, int8_t *, int));
#endif
......@@ -263,7 +263,7 @@ void SIMD_FUNC(aom_clpf_block)(const uint8_t *src, uint8_t *dst, int sstride,
}
}
#if CONFIG_AOM_HIGHBITDEPTH
#if defined(CONFIG_AOM_HIGHBITDEPTH)
// sign(a - b) * max(0, abs(a - b) - max(0, abs(a - b) -
// strength + (abs(a - b) >> (dmp - log2(s)))))
SIMD_INLINE v128 constrain_hbd(v128 a, v128 b, unsigned int strength,
......
......@@ -295,16 +295,6 @@ typedef enum {
} PALETTE_COLOR;
#endif // CONFIG_PALETTE
#ifdef CONFIG_CDEF
#define CLPF_NOFLAG -1
typedef enum {
CLPF_NOSIZE = 0,
CLPF_32X32 = 1,
CLPF_64X64 = 2,
CLPF_128X128 = 3
} CLPF_BLOCK_SIZE;
#endif
typedef enum ATTRIBUTE_PACKED {
DC_PRED, // Average of above and left pixels
V_PRED, // Vertical
......
......@@ -8,14 +8,17 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <math.h>
#include <stdlib.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#include "./config.h"
#endif
#include <stdlib.h>
#include <math.h>
#include "dering.h"
#include "./aom_dsp_rtcd.h"
#include "./av1_rtcd.h"
#include "./cdef.h"
/* Generated from gen_filter_tables.c. */
const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
......@@ -38,7 +41,7 @@ const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
in a particular direction. Since each direction have the same sum(x^2) term,
that term is never computed. See Section 2, step 2, of:
http://jmvalin.ca/notes/intra_paint.pdf */
int od_dir_find8_c(const int16_t *img, int stride, int32_t *var,
int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
int coeff_shift) {
int i;
int32_t cost[8] = { 0 };
......@@ -110,8 +113,9 @@ int od_dir_find8_c(const int16_t *img, int stride, int32_t *var,
}
/* Smooth in the direction detected. */
int od_filter_dering_direction_8x8_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir) {
int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir) {
int i;
int j;
int k;
......@@ -144,8 +148,9 @@ int od_filter_dering_direction_8x8_c(int16_t *y, int ystride, const int16_t *in,
}
/* Smooth in the direction detected. */
int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir) {
int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir) {
int i;
int j;
int k;
......@@ -198,22 +203,22 @@ static INLINE int od_adjust_thresh(int threshold, int32_t var) {
return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
}
static INLINE void copy_8x8_16bit_to_16bit(int16_t *dst, int dstride,
int16_t *src, int sstride) {
static INLINE void copy_8x8_16bit_to_16bit(uint16_t *dst, int dstride,
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
}
static INLINE void copy_4x4_16bit_to_16bit(int16_t *dst, int dstride,
int16_t *src, int sstride) {
static INLINE void copy_4x4_16bit_to_16bit(uint16_t *dst, int dstride,
uint16_t *src, int sstride) {
int i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
}
/* TODO: Optimize this function for SSE. */
void copy_dering_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src,
void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
dering_list *dlist, int dering_count,
int bsize) {
int bi, bx, by;
......@@ -234,10 +239,11 @@ void copy_dering_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src,
}
}
void od_dering(int16_t *y, int16_t *in, int xdec,
void od_dering(uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int dering_count, int threshold,
int coeff_shift) {
int clpf_strength, int clpf_damping, int coeff_shift,
BOUNDARY_TYPE bt) {
int bi;
int bx;
int by;
......@@ -276,6 +282,21 @@ void od_dering(int16_t *y, int16_t *in, int xdec,
dir[by][bx]);
}
}
if (!clpf_strength) return;
copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
bsize);
for (bi = 0; bi < dering_count; bi++) {
BOUNDARY_TYPE bt2 = 0;
by = dlist[bi].by;
bx = dlist[bi].bx;
// Prevent CLPF from reading across superblock boundaries
if (!by) bt2 |= TILE_ABOVE_BOUNDARY;
if (by == (1 << bsize) - 1) bt2 |= TILE_BOTTOM_BOUNDARY;
aom_clpf_block_hbd(in, &y[((bi - by) << 2 * bsize) - (bx << bsize)],
OD_FILT_BSTRIDE, 1 << bsize, bx << bsize, by << bsize,
1 << bsize, 1 << bsize, clpf_strength << coeff_shift,
bt | bt2, clpf_damping + coeff_shift);
}
}
......@@ -24,8 +24,9 @@
#define OD_FILT_VBORDER (3)
/* We only need to buffer three horizontal lines too, but let's make it four
to make vectorization easier. */
#define OD_FILT_HBORDER (4)
#define OD_FILT_BSTRIDE (OD_BSIZE_MAX + 2 * OD_FILT_HBORDER)
#define OD_FILT_HBORDER (32)
#define OD_FILT_BSTRIDE \
ALIGN_POWER_OF_TWO(OD_BSIZE_MAX + 2 * OD_FILT_HBORDER, 5)
#define OD_DERING_VERY_LARGE (30000)
#define OD_DERING_INBUF_SIZE \
......@@ -38,19 +39,22 @@ typedef struct {
unsigned char bx;
} dering_list;
typedef int (*od_filter_dering_direction_func)(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir);
void copy_dering_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src,
typedef int (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
const uint16_t *in,
int threshold, int dir);
void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
dering_list *dlist, int dering_count,
int bsize);
void od_dering(int16_t *y, int16_t *in, int xdec,
void od_dering(uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int skip_stride, int threshold,
int coeff_shift);
int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_8x8_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
dering_list *dlist, int dering_count, int threshold,
int clpf_strength, int clpf_damping, int coeff_shift,
BOUNDARY_TYPE bt);
int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir);
int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir);
#endif
......@@ -35,7 +35,11 @@
#if CONFIG_PVQ
#include "av1/common/pvq.h"
#endif
#if CONFIG_CDEF
struct AV1Common;
typedef struct AV1Common AV1_COMMON;
#include "av1/common/cdef.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
......@@ -166,30 +170,6 @@ typedef struct AV1Common {
// Marks if we need to use 16bit frame buffers (1: yes, 0: no).
int use_highbitdepth;
#endif
#if CONFIG_CDEF
// Two bits are used to signal the strength for all blocks and the
// valid values are:
// 0: no filtering
// 1: strength = 1
// 2: strength = 2
// 3: strength = 4
int clpf_strength_y;
int clpf_strength_u;
int clpf_strength_v;
// If clpf_strength_y is not 0, another two bits are used to signal
// the filter block size. The valid values for clfp_size are:
// 0: no block signalling
// 1: 32x32
// 2: 64x64
// 3: 128x128
CLPF_BLOCK_SIZE clpf_size;
// Buffer for storing whether to filter individual blocks.
int8_t *clpf_blocks;
int clpf_stride;
#endif
YV12_BUFFER_CONFIG *frame_to_show;
RefCntBuffer *prev_frame;
......@@ -417,7 +397,13 @@ typedef struct AV1Common {
int mib_size; // Size of the superblock in units of MI blocks
int mib_size_log2; // Log 2 of above.
#if CONFIG_CDEF
int dering_level;
uint32_t dering_level;
int dering_lev[DERING_REFINEMENT_LEVELS];
int clpf_str[CLPF_REFINEMENT_LEVELS];
int dering_bits;
int clpf_bits;
int clpf_strength_u;
int clpf_strength_v;
#endif
#if CONFIG_DELTA_Q
......
......@@ -228,8 +228,8 @@ static INLINE __m128i od_cmplt_abs_epi16(__m128i in, __m128i threshold) {
return _mm_cmplt_epi16(_mm_abs_epi16(in), threshold);
}
int od_filter_dering_direction_4x4_sse4_1(int16_t *y, int ystride,
const int16_t *in, int threshold,
int od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir) {
int i;