Commit b1555c93 authored by Steinar Midtskogen's avatar Steinar Midtskogen

Speed up CDEF parameter selection for cpu-used > 0

High delay cpu-used=4
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
 0.0579 |  0.1380 | -0.1975 |   0.0361 |  0.0226 |  0.0072 |     0.0470

Low delay cpu-used=4
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.1016 | -0.0695 |  0.1013 |  -0.1324 | -0.0903 | -0.1260 |    -0.1710

Change-Id: I5a66a1ffc2d1fb2a203065b7fbb2fd2bd2b281ad
parent 6ae7b3d9
......@@ -45,7 +45,7 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd);
AV1_COMMON *cm, MACROBLOCKD *xd, int fast);
#ifdef __cplusplus
} // extern "C"
......
......@@ -3381,7 +3381,8 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
cm->nb_cdef_strengths = 1;
} else {
// Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd);
av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd,
cpi->oxcf.speed > 0);
// Apply the filter
av1_cdef_frame(cm->frame_to_show, cm, xd);
......
......@@ -19,13 +19,19 @@
#include "av1/common/reconinter.h"
#include "av1/encoder/encoder.h"
#define REDUCED_STRENGTHS 8
#define REDUCED_TOTAL_STRENGTHS (REDUCED_STRENGTHS * CLPF_STRENGTHS)
#define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
static int priconv[REDUCED_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 };
/* Search for the best strength to add as an option, knowing we
already selected nb_strengths options. */
static uint64_t search_one(int *lev, int nb_strengths,
uint64_t mse[][TOTAL_STRENGTHS], int sb_count) {
uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
int fast) {
uint64_t tot_mse[TOTAL_STRENGTHS];
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id = 0;
......@@ -40,13 +46,13 @@ static uint64_t search_one(int *lev, int nb_strengths,
}
}
/* Find best mse when adding each possible new option. */
for (j = 0; j < TOTAL_STRENGTHS; j++) {
for (j = 0; j < total_strengths; j++) {
uint64_t best = best_mse;
if (mse[i][j] < best) best = mse[i][j];
tot_mse[j] += best;
}
}
for (j = 0; j < TOTAL_STRENGTHS; j++) {
for (j = 0; j < total_strengths; j++) {
if (tot_mse[j] < best_tot_mse) {
best_tot_mse = tot_mse[j];
best_id = j;
......@@ -59,9 +65,10 @@ static uint64_t search_one(int *lev, int nb_strengths,
/* Search for the best luma+chroma strength to add as an option, knowing we
already selected nb_strengths options. */
static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
uint64_t (**mse)[TOTAL_STRENGTHS],
int sb_count) {
uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
int fast) {
uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id0 = 0;
......@@ -79,9 +86,9 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
}
}
/* Find best mse when adding each possible new option. */
for (j = 0; j < TOTAL_STRENGTHS; j++) {
for (j = 0; j < total_strengths; j++) {
int k;
for (k = 0; k < TOTAL_STRENGTHS; k++) {
for (k = 0; k < total_strengths; k++) {
uint64_t best = best_mse;
uint64_t curr = mse[0][i][j];
curr += mse[1][i][k];
......@@ -90,9 +97,9 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
}
}
}
for (j = 0; j < TOTAL_STRENGTHS; j++) {
for (j = 0; j < total_strengths; j++) {
int k;
for (k = 0; k < TOTAL_STRENGTHS; k++) {
for (k = 0; k < total_strengths; k++) {
if (tot_mse[j][k] < best_tot_mse) {
best_tot_mse = tot_mse[j][k];
best_id0 = j;
......@@ -108,20 +115,23 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
/* Search for the set of strengths that minimizes mse. */
static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
uint64_t mse[][TOTAL_STRENGTHS],
int sb_count) {
int sb_count, int fast) {
uint64_t best_tot_mse;
int i;
best_tot_mse = (uint64_t)1 << 63;
/* Greedy search: add one strength options at a time. */
for (i = 0; i < nb_strengths; i++) {
best_tot_mse = search_one(best_lev, i, mse, sb_count);
best_tot_mse = search_one(best_lev, i, mse, sb_count, fast);
}
/* Trying to refine the greedy search by reconsidering each
already-selected option. */
for (i = 0; i < 4 * nb_strengths; i++) {
int j;
for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
best_tot_mse = search_one(best_lev, nb_strengths - 1, mse, sb_count);
if (!fast) {
for (i = 0; i < 4 * nb_strengths; i++) {
int j;
for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
best_tot_mse =
search_one(best_lev, nb_strengths - 1, mse, sb_count, fast);
}
}
return best_tot_mse;
}
......@@ -130,13 +140,14 @@ static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
int nb_strengths,
uint64_t (**mse)[TOTAL_STRENGTHS],
int sb_count) {
int sb_count, int fast) {
uint64_t best_tot_mse;
int i;
best_tot_mse = (uint64_t)1 << 63;
/* Greedy search: add one strength options at a time. */
for (i = 0; i < nb_strengths; i++) {
best_tot_mse = search_one_dual(best_lev0, best_lev1, i, mse, sb_count);
best_tot_mse =
search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast);
}
/* Trying to refine the greedy search by reconsidering each
already-selected option. */
......@@ -146,8 +157,8 @@ static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
best_lev0[j] = best_lev0[j + 1];
best_lev1[j] = best_lev1[j + 1];
}
best_tot_mse =
search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count);
best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
sb_count, fast);
}
return best_tot_mse;
}
......@@ -269,7 +280,7 @@ uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
}
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd) {
AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
int r, c;
int sbr, sbc;
uint16_t *src[3];
......@@ -302,6 +313,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int quantizer;
double lambda;
int nplanes = 3;
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
uint16_t *in;
DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SQUARE]);
......@@ -386,11 +398,12 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
for (pli = 0; pli < nplanes; pli++) {
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
inbuf[i] = OD_DERING_VERY_LARGE;
for (gi = 0; gi < TOTAL_STRENGTHS; gi++) {
for (gi = 0; gi < total_strengths; gi++) {
int threshold;
uint64_t curr_mse;
int clpf_strength;
threshold = gi / CLPF_STRENGTHS;
if (fast) threshold = priconv[threshold];
if (pli > 0 && !chroma_dering) threshold = 0;
/* We avoid filtering the pixels for which some of the pixels to
average
......@@ -440,10 +453,10 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
nb_strengths = 1 << i;
if (nplanes >= 3)
tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
mse, sb_count);
mse, sb_count, fast);
else
tot_mse =
joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count);
tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
fast);
/* Count superblock signalling cost. */
tot_mse += (uint64_t)(sb_count * lambda * i);
/* Count header signalling cost. */
......@@ -477,6 +490,17 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
selected_strength[i] = best_gi;
cm->mi_grid_visible[sb_index[i]]->mbmi.cdef_strength = best_gi;
}
if (fast) {
for (int j = 0; j < nb_strengths; j++) {
cm->cdef_strengths[j] =
priconv[cm->cdef_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
(cm->cdef_strengths[j] % CLPF_STRENGTHS);
cm->cdef_uv_strengths[j] =
priconv[cm->cdef_uv_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
(cm->cdef_uv_strengths[j] % CLPF_STRENGTHS);
}
}
cm->cdef_dering_damping = dering_damping;
cm->cdef_clpf_damping = clpf_damping;
aom_free(mse[0]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment