Commit 33f3bfde authored by David Barker's avatar David Barker Committed by Debargha Mukherjee
Browse files

Optimize Wiener filter selection

* Change the behaviour of search_wiener at borders to match
  the behaviour of the Wiener filter itself
* Reorder the calculation in compute_stats, saving ~5% of
  encode time at low bitrates (tested on bus_cif.y4m at 200kbps)

Change-Id: I5f649d77fd66584451aaf37697ce9c9af69524e4
parent 6928a5d2
......@@ -105,7 +105,7 @@ static void loop_restoration_init(RestorationInternal *rst, int kf, int width,
&rst->nhtiles, &rst->nvtiles);
}
static void extend_frame(uint8_t *data, int width, int height, int stride) {
void extend_frame(uint8_t *data, int width, int height, int stride) {
uint8_t *data_p;
int i;
for (i = 0; i < height; ++i) {
......@@ -842,8 +842,7 @@ static void loop_switchable_filter(uint8_t *data, int width, int height,
}
#if CONFIG_AOM_HIGHBITDEPTH
static void extend_frame_highbd(uint16_t *data, int width, int height,
int stride) {
void extend_frame_highbd(uint16_t *data, int width, int height, int stride) {
uint16_t *data_p;
int i, j;
for (i = 0; i < height; ++i) {
......
......@@ -222,12 +222,14 @@ int av1_alloc_restoration_struct(RestorationInfo *rst_info, int width,
int height);
void av1_free_restoration_struct(RestorationInfo *rst_info);
void extend_frame(uint8_t *data, int width, int height, int stride);
void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
int bit_depth, int r, int eps, int32_t *tmpbuf);
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
int stride, int param, uint8_t *dst,
int dst_stride, int32_t *tmpbuf);
#if CONFIG_AOM_HIGHBITDEPTH
void extend_frame_highbd(uint16_t *data, int width, int height, int stride);
void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
int stride, int param, int bit_depth,
uint16_t *dst, int dst_stride,
......
......@@ -654,13 +654,19 @@ static void compute_stats(uint8_t *dgd, uint8_t *src, int h_start, int h_end,
M[k] += Y[k] * X;
H[k * WIENER_WIN2 + k] += Y[k] * Y[k];
for (l = k + 1; l < WIENER_WIN2; ++l) {
double value = Y[k] * Y[l];
H[k * WIENER_WIN2 + l] += value;
H[l * WIENER_WIN2 + k] += value;
// H is a symmetric matrix, so we only need to fill out the upper
// triangle here. We can copy it down to the lower triangle outside
// the (i, j) loops.
H[k * WIENER_WIN2 + l] += Y[k] * Y[l];
}
}
}
}
for (k = 0; k < WIENER_WIN2; ++k) {
for (l = k + 1; l < WIENER_WIN2; ++l) {
H[l * WIENER_WIN2 + k] = H[k * WIENER_WIN2 + l];
}
}
}
#if CONFIG_AOM_HIGHBITDEPTH
......@@ -702,13 +708,19 @@ static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int h_start,
M[k] += Y[k] * X;
H[k * WIENER_WIN2 + k] += Y[k] * Y[k];
for (l = k + 1; l < WIENER_WIN2; ++l) {
double value = Y[k] * Y[l];
H[k * WIENER_WIN2 + l] += value;
H[l * WIENER_WIN2 + k] += value;
// H is a symmetric matrix, so we only need to fill out the upper
// triangle here. We can copy it down to the lower triangle outside
// the (i, j) loops.
H[k * WIENER_WIN2 + l] += Y[k] * Y[l];
}
}
}
}
for (k = 0; k < WIENER_WIN2; ++k) {
for (l = k + 1; l < WIENER_WIN2; ++l) {
H[l * WIENER_WIN2 + k] = H[k * WIENER_WIN2 + l];
}
}
}
#endif // CONFIG_AOM_HIGHBITDEPTH
......@@ -939,7 +951,6 @@ static double search_wiener_uv(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
const int dgd_stride = dgd->uv_stride;
double score;
int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
int h_start, h_end, v_start, v_end;
const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width,
&tile_height, &nhtiles, &nvtiles);
......@@ -963,30 +974,39 @@ static double search_wiener_uv(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
rsi[plane].frame_restoration_type = RESTORE_WIENER;
h_start = v_start = WIENER_HALFWIN;
h_end = width - WIENER_HALFWIN;
v_end = height - WIENER_HALFWIN;
if (plane == AOM_PLANE_U) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth)
compute_stats_highbd(dgd->u_buffer, src->u_buffer, h_start, h_end,
v_start, v_end, dgd_stride, src_stride, M, H);
else
#endif // CONFIG_AOM_HIGHBITDEPTH
compute_stats(dgd->u_buffer, src->u_buffer, h_start, h_end, v_start,
v_end, dgd_stride, src_stride, M, H);
} else if (plane == AOM_PLANE_V) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth)
compute_stats_highbd(dgd->v_buffer, src->v_buffer, h_start, h_end,
v_start, v_end, dgd_stride, src_stride, M, H);
else
#endif // CONFIG_AOM_HIGHBITDEPTH
compute_stats(dgd->v_buffer, src->v_buffer, h_start, h_end, v_start,
v_end, dgd_stride, src_stride, M, H);
if (cm->use_highbitdepth) {
if (plane == AOM_PLANE_U) {
extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd->u_buffer), width, height,
dgd_stride);
compute_stats_highbd(dgd->u_buffer, src->u_buffer, 0, width, 0, height,
dgd_stride, src_stride, M, H);
} else if (plane == AOM_PLANE_V) {
extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd->v_buffer), width, height,
dgd_stride);
compute_stats_highbd(dgd->v_buffer, src->v_buffer, 0, width, 0, height,
dgd_stride, src_stride, M, H);
} else {
assert(0);
}
} else {
assert(0);
#endif
if (plane == AOM_PLANE_U) {
extend_frame(dgd->u_buffer, width, height, dgd_stride);
compute_stats(dgd->u_buffer, src->u_buffer, 0, width, 0, height,
dgd_stride, src_stride, M, H);
} else if (plane == AOM_PLANE_V) {
extend_frame(dgd->v_buffer, width, height, dgd_stride);
compute_stats(dgd->v_buffer, src->v_buffer, 0, width, 0, height,
dgd_stride, src_stride, M, H);
} else {
assert(0);
}
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) {
info->frame_restoration_type = RESTORE_NONE;
aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
......@@ -1080,6 +1100,15 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx)
rsi->wiener_info[tile_idx].level = 0;
// Construct a (WIENER_HALFWIN)-pixel border around the frame
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth)
extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd->y_buffer), width, height,
dgd_stride);
else
#endif
extend_frame(dgd->y_buffer, width, height, dgd_stride);
// Compute best Wiener filters for each tile
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
......@@ -1093,9 +1122,8 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
best_tile_cost[tile_idx] = DBL_MAX;
av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
tile_height, width, height, WIENER_HALFWIN,
WIENER_HALFWIN, &h_start, &h_end, &v_start,
&v_end);
tile_height, width, height, 0, 0, &h_start, &h_end,
&v_start, &v_end);
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth)
compute_stats_highbd(dgd->y_buffer, src->y_buffer, h_start, h_end,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment