Commit 5f5c132f authored by Jean-Marc Valin's avatar Jean-Marc Valin Committed by Jean-Marc Valin

Jointly optimizing deringing and clpf

We now signal joint strengths and use a greedy algorithm for the search.

low-latency, cpu-used=4:

ll4-cdef@2017-03-22T03:42:10.815Z -> ll4-cdef-newsearch-var-header-newlambda-refine4@2017-03-22T15:56:46.471Z

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0792 |  0.3551 |  0.4393 |  -0.0108 | -0.1338 | -0.0141 |     0.1452

Change-Id: I619ae1c7c7d7ec04fe993cabc5773b07c3f5b201
parent 800df032
...@@ -366,8 +366,7 @@ typedef struct { ...@@ -366,8 +366,7 @@ typedef struct {
int send_dq_bit; int send_dq_bit;
#endif // CONFIG_NEW_QUANT #endif // CONFIG_NEW_QUANT
/* deringing gain *per-superblock* */ /* deringing gain *per-superblock* */
int8_t dering_gain; int8_t cdef_strength;
int8_t clpf_strength;
#if CONFIG_DELTA_Q #if CONFIG_DELTA_Q
int current_q_index; int current_q_index;
#endif #endif
......
...@@ -24,74 +24,6 @@ int dering_level_table[DERING_STRENGTHS] = { ...@@ -24,74 +24,6 @@ int dering_level_table[DERING_STRENGTHS] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, 24, 28, 33, 39, 46, 54, 63 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, 24, 28, 33, 39, 46, 54, 63
}; };
#ifndef NDEBUG
static int is_sorted(const int *arr, int num) {
int sorted = 1;
while (sorted && num-- > 1) sorted &= arr[num] >= arr[num - 1];
return sorted;
}
#endif
uint32_t levels_to_id(const int lev[DERING_REFINEMENT_LEVELS],
const int str[CLPF_REFINEMENT_LEVELS]) {
uint32_t id = 0;
int i;
assert(is_sorted(lev, DERING_REFINEMENT_LEVELS));
assert(is_sorted(str, CLPF_REFINEMENT_LEVELS));
for (i = 0; i < DERING_REFINEMENT_LEVELS; i++)
id = id * DERING_STRENGTHS + lev[i];
for (i = 0; i < CLPF_REFINEMENT_LEVELS; i++)
id = id * CLPF_STRENGTHS + str[i];
return id;
}
void id_to_levels(int lev[DERING_REFINEMENT_LEVELS],
int str[CLPF_REFINEMENT_LEVELS], uint32_t id) {
int i;
for (i = CLPF_REFINEMENT_LEVELS - 1; i >= 0; i--) {
str[i] = id % CLPF_STRENGTHS;
id /= CLPF_STRENGTHS;
}
for (i = DERING_REFINEMENT_LEVELS - 1; i >= 0; i--) {
lev[i] = id % DERING_STRENGTHS;
id /= DERING_STRENGTHS;
}
// Pack tables
int j;
for (i = j = 1; i < DERING_REFINEMENT_LEVELS && j < DERING_REFINEMENT_LEVELS;
i++)
if (lev[j - 1] == lev[j])
memmove(&lev[j - 1], &lev[j],
(DERING_REFINEMENT_LEVELS - j) * sizeof(*lev));
else
j++;
for (i = j = 1; i < CLPF_REFINEMENT_LEVELS && j < DERING_REFINEMENT_LEVELS;
i++)
if (str[j - 1] == str[j])
memmove(&str[j - 1], &str[j],
(CLPF_REFINEMENT_LEVELS - i) * sizeof(*str));
else
j++;
assert(is_sorted(lev, DERING_REFINEMENT_LEVELS));
assert(is_sorted(str, CLPF_REFINEMENT_LEVELS));
}
void cdef_get_bits(const int *lev, const int *str, int *dering_bits,
int *clpf_bits) {
int i;
*dering_bits = *clpf_bits = 1;
for (i = 1; i < DERING_REFINEMENT_LEVELS; i++)
(*dering_bits) += lev[i] != lev[i - 1];
for (i = 1; i < CLPF_REFINEMENT_LEVELS; i++)
(*clpf_bits) += str[i] != str[i - 1];
*dering_bits = get_msb(*dering_bits);
*clpf_bits = get_msb(*clpf_bits);
}
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) { int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
int r, c; int r, c;
int maxc, maxr; int maxc, maxr;
...@@ -212,8 +144,7 @@ static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride, ...@@ -212,8 +144,7 @@ static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
} }
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
uint32_t global_level, int clpf_strength_u, int clpf_strength_u, int clpf_strength_v) {
int clpf_strength_v) {
int r, c; int r, c;
int sbr, sbc; int sbr, sbc;
int nhsb, nvsb; int nhsb, nvsb;
...@@ -231,12 +162,11 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, ...@@ -231,12 +162,11 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int dering_left; int dering_left;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0); int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
int nplanes = 3; int nplanes = 3;
int lev[DERING_REFINEMENT_LEVELS]; int *lev;
int str[CLPF_REFINEMENT_LEVELS];
int chroma_dering = int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y && xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y; xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
id_to_levels(lev, str, global_level); lev = cm->cdef_strengths;
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
av1_setup_dst_planes(xd->plane, frame, 0, 0); av1_setup_dst_planes(xd->plane, frame, 0, 0);
...@@ -277,11 +207,13 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, ...@@ -277,11 +207,13 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
level = dering_level_table level = dering_level_table
[lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride + [lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc] MAX_MIB_SIZE * sbc]
->mbmi.dering_gain]]; ->mbmi.cdef_strength] /
CLPF_STRENGTHS];
clpf_strength = clpf_strength =
str[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride + lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc] MAX_MIB_SIZE * sbc]
->mbmi.clpf_strength]; ->mbmi.cdef_strength] %
CLPF_STRENGTHS;
clpf_strength += clpf_strength == 3; clpf_strength += clpf_strength == 3;
curr_row_dering[sbc] = 0; curr_row_dering[sbc] = 0;
if ((level == 0 && clpf_strength == 0) || if ((level == 0 && clpf_strength == 0) ||
......
...@@ -11,15 +11,8 @@ ...@@ -11,15 +11,8 @@
#ifndef AV1_COMMON_DERING_H_ #ifndef AV1_COMMON_DERING_H_
#define AV1_COMMON_DERING_H_ #define AV1_COMMON_DERING_H_
// ceil(log2(DERING_STRENGTHS^DERING_REFINEMENT_LEVELS * #define CDEF_MAX_STRENGTHS 16
// CLPF_STRENGTHS^CLPF_REFINEMENT_LEVELS)) #define CDEF_STRENGTH_BITS 7
#define DERING_LEVEL_BITS (22)
#define MAX_DERING_LEVEL (1LL << DERING_LEVEL_BITS)
#define DERING_REFINEMENT_BITS 2
#define DERING_REFINEMENT_LEVELS 4
#define CLPF_REFINEMENT_BITS 1
#define CLPF_REFINEMENT_LEVELS 2
#define DERING_STRENGTHS 21 #define DERING_STRENGTHS 21
#define CLPF_STRENGTHS 4 #define CLPF_STRENGTHS 4
...@@ -37,19 +30,11 @@ extern "C" { ...@@ -37,19 +30,11 @@ extern "C" {
extern int dering_level_table[DERING_STRENGTHS]; extern int dering_level_table[DERING_STRENGTHS];
uint32_t levels_to_id(const int lev[DERING_REFINEMENT_LEVELS],
const int str[CLPF_REFINEMENT_LEVELS]);
void id_to_levels(int lev[DERING_REFINEMENT_LEVELS],
int str[CLPF_REFINEMENT_LEVELS], uint32_t id);
void cdef_get_bits(const int *lev, const int *str, int *dering_bits,
int *clpf_bits);
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col); int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col, int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist); dering_list *dlist);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
uint32_t global_level, int clpf_strength_u, int clpf_strength_u, int clpf_strength_v);
int clpf_strength_v);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd); AV1_COMMON *cm, MACROBLOCKD *xd);
......
...@@ -397,11 +397,9 @@ typedef struct AV1Common { ...@@ -397,11 +397,9 @@ typedef struct AV1Common {
int mib_size; // Size of the superblock in units of MI blocks int mib_size; // Size of the superblock in units of MI blocks
int mib_size_log2; // Log 2 of above. int mib_size_log2; // Log 2 of above.
#if CONFIG_CDEF #if CONFIG_CDEF
uint32_t dering_level; int nb_cdef_strengths;
int dering_lev[DERING_REFINEMENT_LEVELS]; int cdef_strengths[CDEF_MAX_STRENGTHS];
int clpf_str[CLPF_REFINEMENT_LEVELS]; int cdef_bits;
int dering_bits;
int clpf_bits;
int clpf_strength_u; int clpf_strength_u;
int clpf_strength_v; int clpf_strength_v;
#endif #endif
......
...@@ -2402,14 +2402,11 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, ...@@ -2402,14 +2402,11 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
if (bsize == BLOCK_64X64) { if (bsize == BLOCK_64X64) {
#endif #endif
if (!sb_all_skip(cm, mi_row, mi_col)) { if (!sb_all_skip(cm, mi_row, mi_col)) {
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.cdef_strength =
aom_read_literal(r, cm->dering_bits, ACCT_STR); aom_read_literal(r, cm->cdef_bits, ACCT_STR);
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.clpf_strength =
aom_read_literal(r, cm->clpf_bits, ACCT_STR);
} else { } else {
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.cdef_strength =
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col] 0;
->mbmi.clpf_strength = 0;
} }
} }
#endif // CONFIG_CDEF #endif // CONFIG_CDEF
...@@ -2673,11 +2670,14 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { ...@@ -2673,11 +2670,14 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
#if CONFIG_CDEF #if CONFIG_CDEF
static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
cm->dering_level = aom_rb_read_literal(rb, DERING_LEVEL_BITS); int i;
cm->cdef_bits = aom_rb_read_literal(rb, 2);
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (i = 0; i < cm->nb_cdef_strengths; i++) {
cm->cdef_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
}
cm->clpf_strength_u = aom_rb_read_literal(rb, 2); cm->clpf_strength_u = aom_rb_read_literal(rb, 2);
cm->clpf_strength_v = aom_rb_read_literal(rb, 2); cm->clpf_strength_v = aom_rb_read_literal(rb, 2);
id_to_levels(cm->dering_lev, cm->clpf_str, cm->dering_level);
cdef_get_bits(cm->dering_lev, cm->clpf_str, &cm->dering_bits, &cm->clpf_bits);
} }
#endif // CONFIG_CDEF #endif // CONFIG_CDEF
...@@ -4950,10 +4950,9 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data, ...@@ -4950,10 +4950,9 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
} }
#if CONFIG_CDEF #if CONFIG_CDEF
if ((cm->dering_level || cm->clpf_strength_u || cm->clpf_strength_v) && if (!cm->skip_loop_filter) {
!cm->skip_loop_filter) { av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->clpf_strength_u,
av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->dering_level, cm->clpf_strength_v);
cm->clpf_strength_u, cm->clpf_strength_v);
} }
#endif // CONFIG_CDEF #endif // CONFIG_CDEF
......
...@@ -2785,14 +2785,10 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, ...@@ -2785,14 +2785,10 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
if (bsize == BLOCK_64X64 && if (bsize == BLOCK_64X64 &&
#endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION
!sb_all_skip(cm, mi_row, mi_col)) { !sb_all_skip(cm, mi_row, mi_col)) {
if (cm->dering_bits) if (cm->cdef_bits != 0)
aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col] aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
->mbmi.dering_gain, ->mbmi.cdef_strength,
cm->dering_bits); cm->cdef_bits);
if (cm->clpf_bits)
aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
->mbmi.clpf_strength,
cm->clpf_bits);
} }
#endif #endif
} }
...@@ -3496,7 +3492,11 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { ...@@ -3496,7 +3492,11 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
#if CONFIG_CDEF #if CONFIG_CDEF
static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
aom_wb_write_literal(wb, cm->dering_level, DERING_LEVEL_BITS); int i;
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
}
aom_wb_write_literal(wb, cm->clpf_strength_u, 2); aom_wb_write_literal(wb, cm->clpf_strength_u, 2);
aom_wb_write_literal(wb, cm->clpf_strength_v, 2); aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
} }
......
...@@ -3522,14 +3522,17 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { ...@@ -3522,14 +3522,17 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
} }
#if CONFIG_CDEF #if CONFIG_CDEF
if (is_lossless_requested(&cpi->oxcf)) { if (is_lossless_requested(&cpi->oxcf)) {
cm->dering_level = cm->clpf_strength_u = cm->clpf_strength_v = 0; cm->clpf_strength_u = cm->clpf_strength_v = 0;
cm->cdef_bits = 0;
cm->cdef_strengths[0] = 0;
cm->nb_cdef_strengths = 1;
} else { } else {
// Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
av1_cdef_search(cm->frame_to_show, cpi->Source, cm, xd); av1_cdef_search(cm->frame_to_show, cpi->Source, cm, xd);
// Apply the filter // Apply the filter
av1_cdef_frame(cm->frame_to_show, cm, xd, cm->dering_level, av1_cdef_frame(cm->frame_to_show, cm, xd, cm->clpf_strength_u,
cm->clpf_strength_u, cm->clpf_strength_v); cm->clpf_strength_v);
// Pack the clpf chroma strengths into two bits each // Pack the clpf chroma strengths into two bits each
cm->clpf_strength_u -= cm->clpf_strength_u == 4; cm->clpf_strength_u -= cm->clpf_strength_u == 4;
......
...@@ -20,6 +20,64 @@ ...@@ -20,6 +20,64 @@
#include "av1/encoder/clpf_rdo.h" #include "av1/encoder/clpf_rdo.h"
#include "av1/encoder/encoder.h" #include "av1/encoder/encoder.h"
#define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
/* Search for the best strength to add as an option, knowing we
already selected nb_strengths options. */
static uint64_t search_one(int *lev, int nb_strengths,
uint64_t mse[][TOTAL_STRENGTHS], int sb_count) {
uint64_t tot_mse[TOTAL_STRENGTHS];
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id = 0;
memset(tot_mse, 0, sizeof(tot_mse));
for (i = 0; i < sb_count; i++) {
int gi;
uint64_t best_mse = (uint64_t)1 << 63;
/* Find best mse among already selected options. */
for (gi = 0; gi < nb_strengths; gi++) {
if (mse[i][lev[gi]] < best_mse) {
best_mse = mse[i][lev[gi]];
}
}
/* Find best mse when adding each possible new option. */
for (j = 0; j < TOTAL_STRENGTHS; j++) {
uint64_t best = best_mse;
if (mse[i][j] < best) best = mse[i][j];
tot_mse[j] += best;
}
}
for (j = 0; j < TOTAL_STRENGTHS; j++) {
if (tot_mse[j] < best_tot_mse) {
best_tot_mse = tot_mse[j];
best_id = j;
}
}
lev[nb_strengths] = best_id;
return best_tot_mse;
}
/* Search for the set of strengths that minimizes mse. */
static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
uint64_t mse[][TOTAL_STRENGTHS],
int sb_count) {
uint64_t best_tot_mse;
int i;
best_tot_mse = (uint64_t)1 << 63;
/* Greedy search: add one strength options at a time. */
for (i = 0; i < nb_strengths; i++) {
best_tot_mse = search_one(best_lev, i, mse, sb_count);
}
/* Trying to refine the greedy search by reconsidering each
already-selected option. */
for (i = 0; i < 4 * nb_strengths; i++) {
int j;
for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
best_tot_mse = search_one(best_lev, nb_strengths - 1, mse, sb_count);
}
return best_tot_mse;
}
static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride, static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride,
int nhb, int nvb, int coeff_shift) { int nhb, int nvb, int coeff_shift) {
int i, j; int i, j;
...@@ -50,21 +108,24 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, ...@@ -50,21 +108,24 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int level; int level;
int dering_count; int dering_count;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0); int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
uint64_t best_tot_mse = 0; uint64_t best_tot_mse = (uint64_t)1 << 63;
uint64_t tot_mse;
int sb_count; int sb_count;
int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index)); int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse)[DERING_STRENGTHS][CLPF_STRENGTHS] = uint64_t(*mse)[DERING_STRENGTHS * CLPF_STRENGTHS] =
aom_malloc(sizeof(*mse) * nvsb * nhsb); aom_malloc(sizeof(*mse) * nvsb * nhsb);
int clpf_damping = 3 + (cm->base_qindex >> 6); int clpf_damping = 3 + (cm->base_qindex >> 6);
int i; int i;
int lev[DERING_REFINEMENT_LEVELS]; int best_lev[CDEF_MAX_STRENGTHS];
int best_lev[DERING_REFINEMENT_LEVELS]; int nb_strengths;
int str[CLPF_REFINEMENT_LEVELS]; int nb_strength_bits;
int best_str[CLPF_REFINEMENT_LEVELS]; int quantizer;
double lambda = exp(cm->base_qindex / 36.0); double lambda;
static int log2[] = { 0, 1, 2, 2 }; quantizer =
av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
lambda = .12 * quantizer * quantizer / 256.;
src = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64); src = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
ref_coeff = ref_coeff =
...@@ -143,7 +204,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, ...@@ -143,7 +204,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
i + (i == 3), clpf_damping, coeff_shift); i + (i == 3), clpf_damping, coeff_shift);
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst, copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst,
dlist, dering_count, bsize[0]); dlist, dering_count, bsize[0]);
mse[sb_count][gi][i] = (int)compute_dist( mse[sb_count][gi * CLPF_STRENGTHS + i] = (int)compute_dist(
dst, MAX_MIB_SIZE << bsize[0], dst, MAX_MIB_SIZE << bsize[0],
&ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) + &ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
(sbc * MAX_MIB_SIZE << bsize[0])], (sbc * MAX_MIB_SIZE << bsize[0])],
...@@ -155,85 +216,38 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, ...@@ -155,85 +216,38 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
sb_count++; sb_count++;
} }
} }
best_tot_mse = (uint64_t)1 << 63;
int l0;
for (l0 = 0; l0 < DERING_STRENGTHS; l0++) {
int l1;
lev[0] = l0;
for (l1 = l0; l1 < DERING_STRENGTHS; l1++) {
int l2;
lev[1] = l1;
for (l2 = l1; l2 < DERING_STRENGTHS; l2++) {
int l3;
lev[2] = l2;
for (l3 = l2; l3 < DERING_STRENGTHS; l3++) {
int cs0;
lev[3] = l3;
for (cs0 = 0; cs0 < CLPF_STRENGTHS; cs0++) {
int cs1;
str[0] = cs0;
for (cs1 = cs0; cs1 < CLPF_STRENGTHS; cs1++) {
uint64_t tot_mse = 0;
str[1] = cs1;
for (i = 0; i < sb_count; i++) {
int gi;
int cs;
uint64_t best_mse = (uint64_t)1 << 63;
for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
for (cs = 0; cs < CLPF_REFINEMENT_LEVELS; cs++) {
if (mse[i][lev[gi]][str[cs]] < best_mse) {
best_mse = mse[i][lev[gi]][str[cs]];
}
}
}
tot_mse += best_mse;
}
// Add the bit cost nb_strength_bits = 0;
int dering_diffs = 0, clpf_diffs = 0; /* Search for different number of signalling bits. */
for (i = 1; i < DERING_REFINEMENT_LEVELS; i++) for (i = 0; i <= 3; i++) {
dering_diffs += lev[i] != lev[i - 1]; nb_strengths = 1 << i;
for (i = 1; i < CLPF_REFINEMENT_LEVELS; i++) tot_mse = joint_strength_search(best_lev, nb_strengths, mse, sb_count);
clpf_diffs += str[i] != str[i - 1]; /* Count superblock signalling cost. */
tot_mse += (uint64_t)(sb_count * lambda * tot_mse += (uint64_t)(sb_count * lambda * i);
(log2[dering_diffs] + log2[clpf_diffs])); /* Count header signalling cost. */
tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
if (tot_mse < best_tot_mse) { if (tot_mse < best_tot_mse) {
for (i = 0; i < DERING_REFINEMENT_LEVELS; i++) best_tot_mse = tot_mse;
best_lev[i] = lev[i]; nb_strength_bits = i;
for (i = 0; i < CLPF_REFINEMENT_LEVELS; i++)
best_str[i] = str[i];
best_tot_mse = tot_mse;
}
}
}
}
}
} }
} }
for (i = 0; i < DERING_REFINEMENT_LEVELS; i++) lev[i] = best_lev[i]; nb_strengths = 1 << nb_strength_bits;
for (i = 0; i < CLPF_REFINEMENT_LEVELS; i++) str[i] = best_str[i];
id_to_levels(lev, str, levels_to_id(lev, str)); // Pack tables
cdef_get_bits(lev, str, &cm->dering_bits, &cm->clpf_bits);
cm->cdef_bits = nb_strength_bits;
cm->nb_cdef_strengths = nb_strengths;
for (i = 0; i < nb_strengths; i++) cm->cdef_strengths[i] = best_lev[i];
for (i = 0; i < sb_count; i++) { for (i = 0; i < sb_count; i++) {
int gi, cs; int gi;
int best_gi, best_clpf; int best_gi;
uint64_t best_mse = (uint64_t)1 << 63; uint64_t best_mse = (uint64_t)1 << 63;
best_gi = best_clpf = 0; best_gi = 0;
for (gi = 0; gi < (1 << cm->dering_bits); gi++) { for (gi = 0; gi < cm->nb_cdef_strengths; gi++) {
for (cs = 0; cs < (1 << cm->clpf_bits); cs++) { if (mse[i][best_lev[gi]] < best_mse) {
if (mse[i][lev[gi]][str[cs]] < best_mse) { best_gi = gi;
best_gi = gi; best_mse = mse[i][best_lev[gi]];
best_clpf = cs;
best_mse = mse[i][lev[gi]][str[cs]];
}
} }
} }