Commit e9f77424 authored by Jean-Marc Valin's avatar Jean-Marc Valin Committed by Jean-Marc Valin

Do real chroma RDO search for CDEF

Chroma now has a list of strenghts too, with the superblock signalling
shared between luma and chroma.

low-latency, cpu=4:

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
-0.0114 | -1.4626 | -1.4745 |  -0.0423 | 0.0430 | -0.0001 |    -0.7416

Change-Id: I389c77f1d80020f810e45f8502c656ad9d397c8c
parent b9370acd
......@@ -854,31 +854,14 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd";
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
}
}
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
}
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_multi sse2 ssse3 sse4_1 neon/;
}
}
......
......@@ -110,13 +110,6 @@ AV1_CX_SRCS-yes += encoder/mbgraph.c
AV1_CX_SRCS-yes += encoder/mbgraph.h
ifeq ($(CONFIG_CDEF),yes)
AV1_CX_SRCS-yes += encoder/pickcdef.c
AV1_CX_SRCS-yes += encoder/clpf_rdo.c
AV1_CX_SRCS-yes += encoder/clpf_rdo.h
AV1_CX_SRCS-yes += encoder/clpf_rdo_simd.h
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/clpf_rdo_sse2.c
AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/clpf_rdo_ssse3.c
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/clpf_rdo_sse4.c
AV1_CX_SRCS-$(HAVE_NEON) += encoder/clpf_rdo_neon.c
endif
ifeq ($(CONFIG_PVQ),yes)
# PVQ from daala
......
......@@ -143,8 +143,8 @@ static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
#endif
}
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int clpf_strength_u, int clpf_strength_v) {
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd) {
int r, c;
int sbr, sbc;
int nhsb, nvsb;
......@@ -162,11 +162,9 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int dering_left;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
int nplanes = 3;
int *lev;
int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
lev = cm->cdef_strengths;
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
av1_setup_dst_planes(xd->plane, frame, 0, 0);
......@@ -193,6 +191,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
dering_left = 1;
for (sbc = 0; sbc < nhsb; sbc++) {
int level, clpf_strength;
int uv_level, uv_clpf_strength;
int nhb, nvb;
int cstart = 0;
#if 0 // TODO(stemidts/jmvalin): Handle tile borders correctly
......@@ -205,18 +204,34 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
level = dering_level_table
[lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] /
[cm->cdef_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] /
CLPF_STRENGTHS];
clpf_strength =
lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] %
cm->cdef_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] %
CLPF_STRENGTHS;
clpf_strength += clpf_strength == 3;
uv_level = dering_level_table
[cm->cdef_uv_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] /
CLPF_STRENGTHS];
uv_clpf_strength =
cm->cdef_uv_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] %
CLPF_STRENGTHS;
uv_clpf_strength += uv_clpf_strength == 3;
curr_row_dering[sbc] = 0;
if ((level == 0 && clpf_strength == 0) ||
if ((level == 0 && clpf_strength == 0 && uv_level == 0 &&
uv_clpf_strength == 0) ||
(dering_count = sb_compute_dering_list(
cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) {
dering_left = 0;
......@@ -232,9 +247,11 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int clpf_damping = 3 - (pli != AOM_PLANE_Y) + (cm->base_qindex >> 6);
if (pli) {
if (!chroma_dering) level = 0;
clpf_strength = pli == 1 ? clpf_strength_u : clpf_strength_v;
clpf_strength += clpf_strength == 3;
if (chroma_dering)
level = uv_level;
else
level = 0;
clpf_strength = uv_clpf_strength;
}
if (sbc == nhsb - 1)
cend = (nhb << bsize[pli]);
......@@ -359,12 +376,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
(nhb << bsize[pli]));
/* FIXME: This is a temporary hack that uses more conservative
deringing for chroma. */
if (pli)
threshold = (level * 5 + 4) >> 3 << coeff_shift;
else
threshold = level << coeff_shift;
threshold = level << coeff_shift;
if (threshold == 0 && clpf_strength == 0) continue;
od_dering(dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
......
......@@ -33,8 +33,7 @@ extern int dering_level_table[DERING_STRENGTHS];
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int clpf_strength_u, int clpf_strength_v);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd);
......
......@@ -404,9 +404,8 @@ typedef struct AV1Common {
#if CONFIG_CDEF
int nb_cdef_strengths;
int cdef_strengths[CDEF_MAX_STRENGTHS];
int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
int cdef_bits;
int clpf_strength_u;
int clpf_strength_v;
#endif
#if CONFIG_DELTA_Q
......
......@@ -2672,9 +2672,8 @@ static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (i = 0; i < cm->nb_cdef_strengths; i++) {
cm->cdef_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
cm->cdef_uv_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
}
cm->clpf_strength_u = aom_rb_read_literal(rb, 2);
cm->clpf_strength_v = aom_rb_read_literal(rb, 2);
}
#endif // CONFIG_CDEF
......@@ -4948,8 +4947,7 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
#if CONFIG_CDEF
if (!cm->skip_loop_filter) {
av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->clpf_strength_u,
cm->clpf_strength_v);
av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
}
#endif // CONFIG_CDEF
......
......@@ -3496,9 +3496,8 @@ static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS);
}
aom_wb_write_literal(wb, cm->clpf_strength_u, 2);
aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
}
#endif
......
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/clpf.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_image.h"
#include "aom/aom_integer.h"
#include "av1/common/quant_common.h"
// Calculate the error of a filtered and unfiltered block
void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
int ostride, int x0, int y0, int width, int height,
int *sum0, int *sum1, unsigned int strength, int size,
unsigned int dmp) {
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
const int O = org[y * ostride + x];
const int X = rec[y * rstride + x];
const int A = rec[AOMMAX(0, y - 2) * rstride + x];
const int B = rec[AOMMAX(0, y - 1) * rstride + x];
const int C = rec[y * rstride + AOMMAX(0, x - 2)];
const int D = rec[y * rstride + AOMMAX(0, x - 1)];
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)];
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)];
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, dmp);
const int Y = X + delta;
*sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y);
}
}
}
void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size,
unsigned int dmp) {
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
const int O = org[y * ostride + x];
const int X = rec[y * rstride + x];
const int A = rec[AOMMAX(0, y - 2) * rstride + x];
const int B = rec[AOMMAX(0, y - 1) * rstride + x];
const int C = rec[y * rstride + AOMMAX(0, x - 2)];
const int D = rec[y * rstride + AOMMAX(0, x - 1)];
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)];
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)];
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp);
const int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp);
const int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp);
const int F1 = X + delta1;
const int F2 = X + delta2;
const int F3 = X + delta3;
sum[0] += (O - X) * (O - X);
sum[1] += (O - F1) * (O - F1);
sum[2] += (O - F2) * (O - F2);
sum[3] += (O - F3) * (O - F3);
}
}
}
#if CONFIG_AOM_HIGHBITDEPTH
// Identical to aom_clpf_detect_c() apart from "rec" and "org".
void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0, int width,
int height, int *sum0, int *sum1,
unsigned int strength, int size, unsigned int bd,
unsigned int dmp) {
const int shift = bd - 8;
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
const int O = org[y * ostride + x] >> shift;
const int X = rec[y * rstride + x] >> shift;
const int A = rec[AOMMAX(0, y - 2) * rstride + x] >> shift;
const int B = rec[AOMMAX(0, y - 1) * rstride + x] >> shift;
const int C = rec[y * rstride + AOMMAX(0, x - 2)] >> shift;
const int D = rec[y * rstride + AOMMAX(0, x - 1)] >> shift;
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)] >> shift;
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)] >> shift;
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta = av1_clpf_sample(X, A, B, C, D, E, F, G, H,
strength >> shift, dmp - shift);
const int Y = X + delta;
*sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y);
}
}
}
// aom_clpf_detect_multi_c() apart from "rec" and "org".
void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size,
unsigned int bd, unsigned int dmp) {
const int shift = bd - 8;
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
int O = org[y * ostride + x] >> shift;
int X = rec[y * rstride + x] >> shift;
const int A = rec[AOMMAX(0, y - 2) * rstride + x] >> shift;
const int B = rec[AOMMAX(0, y - 1) * rstride + x] >> shift;
const int C = rec[y * rstride + AOMMAX(0, x - 2)] >> shift;
const int D = rec[y * rstride + AOMMAX(0, x - 1)] >> shift;
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)] >> shift;
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)] >> shift;
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta1 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp - shift);
const int delta2 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp - shift);
const int delta3 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp - shift);
const int F1 = X + delta1;
const int F2 = X + delta2;
const int F3 = X + delta3;
sum[0] += (O - X) * (O - X);
sum[1] += (O - F1) * (O - F1);
sum[2] += (O - F2) * (O - F2);
sum[3] += (O - F3) * (O - F3);
}
}
}
#endif
// Calculate the square error of all filter settings. Result:
// res[0][0] : unfiltered
// res[0][1-3] : strength=1,2,4, no signals
static void clpf_rdo(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
unsigned int block_size, int w, int h, uint64_t res[4],
int plane) {
int m, n;
int sum[4];
const int subx = plane != AOM_PLANE_Y && rec->subsampling_x;
const int suby = plane != AOM_PLANE_Y && rec->subsampling_y;
uint8_t *rec_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? rec->u_buffer : rec->v_buffer)
: rec->y_buffer;
uint8_t *org_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? org->u_buffer : org->v_buffer)
: org->y_buffer;
int rec_width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int rec_height =
plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
sum[0] = sum[1] = sum[2] = sum[3] = 0;
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
int xpos = n * block_size;
int ypos = m * block_size;
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_detect_multi_hbd(
CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum,
block_size, cm->bit_depth, damping);
} else {
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
block_size, damping);
}
#else
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
block_size, damping);
#endif
}
}
}
res[0] += sum[0];
res[1] += sum[1];
res[2] += sum[2];
res[3] += sum[3];
}
void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int plane) {
int i;
uint64_t best, sums[4];
int width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int height = plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
const int bs = MI_SIZE;
const int bslog = get_msb(bs);
memset(sums, 0, sizeof(sums));
clpf_rdo(rec, org, cm, bs, width >> bslog, height >> bslog, sums, plane);
// Add a favourable bias for conservative strengths
for (i = 0; i < 4; i++) sums[i] -= sums[i] >> (7 + i);
// Tag the strength to the error
for (i = 0; i < 4; i++) sums[i] = (sums[i] << 2) + i;
// Identify the strength with the smallest error
best = (uint64_t)1 << 63;
for (i = 0; i < 4; i++)
if (sums[i] < best) best = sums[i];
*best_strength = best & 3 ? 1 << ((best - 1) & 3) : 0;
}
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_ENCODER_CLPF_H_
#define AV1_ENCODER_CLPF_H_
#include "av1/common/reconinter.h"
void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int plane);
#endif
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_neon
#include "./clpf_rdo_simd.h"
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse2
#include "./clpf_rdo_simd.h"
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse4_1
#include "./clpf_rdo_simd.h"
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_ssse3
#include "./clpf_rdo_simd.h"
......@@ -19,7 +19,6 @@
#if CONFIG_CDEF
#include "av1/common/cdef.h"
#include "av1/common/clpf.h"
#include "av1/encoder/clpf_rdo.h"
#endif // CONFIG_CDEF
#include "av1/common/filter.h"
#include "av1/common/idct.h"
......@@ -3522,7 +3521,6 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
}
#if CONFIG_CDEF
if (is_lossless_requested(&cpi->oxcf)) {
cm->clpf_strength_u = cm->clpf_strength_v = 0;
cm->cdef_bits = 0;
cm->cdef_strengths[0] = 0;
cm->nb_cdef_strengths = 1;
......@@ -3531,12 +3529,7 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
av1_cdef_search(cm->frame_to_show, cpi->Source, cm, xd);
// Apply the filter
av1_cdef_frame(cm->frame_to_show, cm, xd, cm->clpf_strength_u,
cm->clpf_strength_v);
// Pack the clpf chroma strengths into two bits each
cm->clpf_strength_u -= cm->clpf_strength_u == 4;
cm->clpf_strength_v -= cm->clpf_strength_v == 4;
av1_cdef_frame(cm->frame_to_show, cm, xd);
}
#endif
#if CONFIG_LOOP_RESTORATION
......
......@@ -17,7 +17,6 @@
#include "av1/common/cdef.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/reconinter.h"
#include "av1/encoder/clpf_rdo.h"
#include "av1/encoder/encoder.h"
#define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
......@@ -79,12 +78,12 @@ static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
}
static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride,
int nhb, int nvb, int coeff_shift) {
int nhb, int nvb, int coeff_shift, int bsize) {
int i, j;
double sum;
sum = 0;
for (i = 0; i < nvb << 3; i++) {
for (j = 0; j < nhb << 3; j++) {
for (i = 0; i < nvb << bsize; i++) {
for (j = 0; j < nhb << bsize; j++) {
double tmp;
tmp = x[i * xstride + j] - y[i * ystride + j];
sum += tmp * tmp;
......@@ -97,11 +96,11 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd) {
int r, c;
int sbr, sbc;
uint16_t *src;
uint16_t *ref_coeff;
uint16_t *src[3];
uint16_t *ref_coeff[3];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride;
int stride[3];
int bsize[3];
int dec[3];
int pli;
......@@ -114,8 +113,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse)[DERING_STRENGTHS * CLPF_STRENGTHS] =
aom_malloc(sizeof(*mse) * nvsb * nhsb);
int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse[3])[TOTAL_STRENGTHS];
int clpf_damping = 3 + (cm->base_qindex >> 6);
int i;
int best_lev[CDEF_MAX_STRENGTHS];
......@@ -123,35 +122,56 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int nb_strength_bits;
int quantizer;
double lambda;
int nplanes = 3;
int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
quantizer =
av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
lambda = .12 * quantizer * quantizer / 256.;
src = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
ref_coeff =
aom_memalign(32, sizeof(*ref_coeff) * cm->mi_rows