pickrst.c 59 KB
Newer Older
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 11 12
 */

#include <assert.h>
13
#include <float.h>
14 15 16
#include <limits.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
17
#include "./aom_scale_rtcd.h"
18

Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20 21
#include "aom_dsp/binary_codes_writer.h"
#include "aom_dsp/psnr.h"
Yaowu Xu's avatar
Yaowu Xu committed
22
#include "aom_mem/aom_mem.h"
23
#include "aom_ports/mem.h"
24
#include "aom_ports/system_state.h"
25

26 27
#include "av1/common/onyxc_int.h"
#include "av1/common/quant_common.h"
28
#include "av1/common/restoration.h"
29

30
#include "av1/encoder/av1_quantize.h"
31
#include "av1/encoder/encoder.h"
32
#include "av1/encoder/mathutils.h"
33 34
#include "av1/encoder/picklpf.h"
#include "av1/encoder/pickrst.h"
35

36
// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
37
// When set to RESTORE_TYPES we allow switchable.
38
static const RestorationType force_restore_type = RESTORE_TYPES;
39 40

// Number of Wiener iterations
41
#define NUM_WIENER_ITERS 5
42

43
typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src,
44
                                      AV1_COMP *cpi, int partial_frame,
45
                                      int plane, RestorationInfo *info,
46
                                      RestorationType *rest_level,
47
                                      int64_t *best_tile_cost,
48
                                      YV12_BUFFER_CONFIG *dst_frame);
49

50
const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
51 52

static int64_t sse_restoration_tile(const YV12_BUFFER_CONFIG *src,
53 54
                                    const YV12_BUFFER_CONFIG *dst,
                                    const AV1_COMMON *cm, int h_start,
55
                                    int width, int v_start, int height,
56 57
                                    int components_pattern) {
  int64_t filt_err = 0;
58 59 60 61
  (void)cm;
  // Y and UV components cannot be mixed
  assert(components_pattern == 1 || components_pattern == 2 ||
         components_pattern == 4 || components_pattern == 6);
62
#if CONFIG_HIGHBITDEPTH
63
  if (cm->use_highbitdepth) {
64 65 66 67 68
    if ((components_pattern >> AOM_PLANE_Y) & 1) {
      filt_err +=
          aom_highbd_get_y_sse_part(src, dst, h_start, width, v_start, height);
    }
    if ((components_pattern >> AOM_PLANE_U) & 1) {
69 70
      filt_err +=
          aom_highbd_get_u_sse_part(src, dst, h_start, width, v_start, height);
71 72
    }
    if ((components_pattern >> AOM_PLANE_V) & 1) {
73 74
      filt_err +=
          aom_highbd_get_v_sse_part(src, dst, h_start, width, v_start, height);
75 76
    }
    return filt_err;
77
  }
78
#endif  // CONFIG_HIGHBITDEPTH
79 80 81 82
  if ((components_pattern >> AOM_PLANE_Y) & 1) {
    filt_err += aom_get_y_sse_part(src, dst, h_start, width, v_start, height);
  }
  if ((components_pattern >> AOM_PLANE_U) & 1) {
83
    filt_err += aom_get_u_sse_part(src, dst, h_start, width, v_start, height);
84 85
  }
  if ((components_pattern >> AOM_PLANE_V) & 1) {
86
    filt_err += aom_get_v_sse_part(src, dst, h_start, width, v_start, height);
87
  }
88 89 90
  return filt_err;
}

91 92
static int64_t sse_restoration_frame(AV1_COMMON *const cm,
                                     const YV12_BUFFER_CONFIG *src,
93 94 95
                                     const YV12_BUFFER_CONFIG *dst,
                                     int components_pattern) {
  int64_t filt_err = 0;
96
#if CONFIG_HIGHBITDEPTH
97 98 99 100 101 102 103 104 105 106 107 108
  if (cm->use_highbitdepth) {
    if ((components_pattern >> AOM_PLANE_Y) & 1) {
      filt_err += aom_highbd_get_y_sse(src, dst);
    }
    if ((components_pattern >> AOM_PLANE_U) & 1) {
      filt_err += aom_highbd_get_u_sse(src, dst);
    }
    if ((components_pattern >> AOM_PLANE_V) & 1) {
      filt_err += aom_highbd_get_v_sse(src, dst);
    }
    return filt_err;
  }
109 110
#else
  (void)cm;
111
#endif  // CONFIG_HIGHBITDEPTH
112 113 114 115 116 117 118 119 120 121 122 123
  if ((components_pattern >> AOM_PLANE_Y) & 1) {
    filt_err = aom_get_y_sse(src, dst);
  }
  if ((components_pattern >> AOM_PLANE_U) & 1) {
    filt_err += aom_get_u_sse(src, dst);
  }
  if ((components_pattern >> AOM_PLANE_V) & 1) {
    filt_err += aom_get_v_sse(src, dst);
  }
  return filt_err;
}

124 125
static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src,
                                    AV1_COMP *const cpi, RestorationInfo *rsi,
126
                                    int components_pattern, int partial_frame,
127
                                    int tile_idx,
128
                                    YV12_BUFFER_CONFIG *dst_frame) {
129
  AV1_COMMON *const cm = &cpi->common;
130 131 132 133 134

  // Y and UV components cannot be mixed
  assert(components_pattern == 1 || components_pattern == 2 ||
         components_pattern == 4 || components_pattern == 6);

135 136 137 138 139 140 141 142 143
  const int is_uv = components_pattern > 1;
  const int width = src->crop_widths[is_uv];
  const int height = src->crop_heights[is_uv];

  const int rtile_size = cm->rst_info[is_uv].restoration_tilesize;
  const int ss_y = is_uv && cm->subsampling_y;

  int nhtiles, nvtiles;
  av1_get_rest_ntiles(width, height, rtile_size, &nhtiles, &nvtiles);
144

145 146
  av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, components_pattern,
                             partial_frame, dst_frame);
147
  RestorationTileLimits limits = av1_get_rest_tile_limits(
148 149
      tile_idx, nhtiles, nvtiles, rtile_size, width, height, ss_y);
  int64_t filt_err = sse_restoration_tile(
150 151
      src, dst_frame, cm, limits.h_start, limits.h_end - limits.h_start,
      limits.v_start, limits.v_end - limits.v_start, components_pattern);
152 153 154 155 156

  return filt_err;
}

static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
Yaowu Xu's avatar
Yaowu Xu committed
157
                                     AV1_COMP *const cpi, RestorationInfo *rsi,
158
                                     int components_pattern, int partial_frame,
159
                                     YV12_BUFFER_CONFIG *dst_frame) {
Yaowu Xu's avatar
Yaowu Xu committed
160
  AV1_COMMON *const cm = &cpi->common;
161
  int64_t filt_err;
162 163
  av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, components_pattern,
                             partial_frame, dst_frame);
164
  filt_err = sse_restoration_frame(cm, src, dst_frame, components_pattern);
165 166 167
  return filt_err;
}

168 169
static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
                                    int src_stride, const uint8_t *dat8,
170
                                    int dat_stride, int use_highbitdepth,
171 172
                                    int32_t *flt1, int flt1_stride,
                                    int32_t *flt2, int flt2_stride, int *xqd) {
173 174 175 176
  int i, j;
  int64_t err = 0;
  int xq[2];
  decode_xq(xqd, xq);
177
  if (!use_highbitdepth) {
178 179 180 181 182 183 184 185
    const uint8_t *src = src8;
    const uint8_t *dat = dat8;
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const int32_t u =
            (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barker's avatar
David Barker committed
186
        const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
        const int32_t e =
            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
            src[i * src_stride + j];
        err += e * e;
      }
    }
  } else {
    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const int32_t u =
            (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barker's avatar
David Barker committed
202
        const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
203 204 205 206 207
        const int32_t e =
            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
            src[i * src_stride + j];
        err += e * e;
      }
208 209 210 211 212
    }
  }
  return err;
}

213 214
#define USE_SGRPROJ_REFINEMENT_SEARCH 1
static int64_t finer_search_pixel_proj_error(
215
    const uint8_t *src8, int width, int height, int src_stride,
216
    const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1,
217
    int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
218
  int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
219 220
                                     dat_stride, use_highbitdepth, flt1,
                                     flt1_stride, flt2, flt2_stride, xqd);
221 222 223 224 225 226 227 228 229 230 231 232
  (void)start_step;
#if USE_SGRPROJ_REFINEMENT_SEARCH
  int64_t err2;
  int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
  int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
  for (int s = start_step; s >= 1; s >>= 1) {
    for (int p = 0; p < 2; ++p) {
      int skip = 0;
      do {
        if (xqd[p] - s >= tap_min[p]) {
          xqd[p] -= s;
          err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
233 234
                                      dat_stride, use_highbitdepth, flt1,
                                      flt1_stride, flt2, flt2_stride, xqd);
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
          if (err2 > err) {
            xqd[p] += s;
          } else {
            err = err2;
            skip = 1;
            // At the highest step size continue moving in the same direction
            if (s == start_step) continue;
          }
        }
        break;
      } while (1);
      if (skip) break;
      do {
        if (xqd[p] + s <= tap_max[p]) {
          xqd[p] += s;
          err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
251 252
                                      dat_stride, use_highbitdepth, flt1,
                                      flt1_stride, flt2, flt2_stride, xqd);
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
          if (err2 > err) {
            xqd[p] -= s;
          } else {
            err = err2;
            // At the highest step size continue moving in the same direction
            if (s == start_step) continue;
          }
        }
        break;
      } while (1);
    }
  }
#endif  // USE_SGRPROJ_REFINEMENT_SEARCH
  return err;
}

269
static void get_proj_subspace(const uint8_t *src8, int width, int height,
270
                              int src_stride, uint8_t *dat8, int dat_stride,
271 272 273
                              int use_highbitdepth, int32_t *flt1,
                              int flt1_stride, int32_t *flt2, int flt2_stride,
                              int *xq) {
274 275 276 277 278 279 280
  int i, j;
  double H[2][2] = { { 0, 0 }, { 0, 0 } };
  double C[2] = { 0, 0 };
  double Det;
  double x[2];
  const int size = width * height;

281 282
  aom_clear_system_state();

283 284 285
  // Default
  xq[0] = 0;
  xq[1] = 0;
286
  if (!use_highbitdepth) {
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
    const uint8_t *src = src8;
    const uint8_t *dat = dat8;
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const double s =
            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
        const double f1 = (double)flt1[i * flt1_stride + j] - u;
        const double f2 = (double)flt2[i * flt2_stride + j] - u;
        H[0][0] += f1 * f1;
        H[1][1] += f2 * f2;
        H[0][1] += f1 * f2;
        C[0] += f1 * s;
        C[1] += f2 * s;
      }
    }
  } else {
    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const double s =
            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
        const double f1 = (double)flt1[i * flt1_stride + j] - u;
        const double f2 = (double)flt2[i * flt2_stride + j] - u;
        H[0][0] += f1 * f1;
        H[1][1] += f2 * f2;
        H[0][1] += f1 * f2;
        C[0] += f1 * s;
        C[1] += f2 * s;
      }
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    }
  }
  H[0][0] /= size;
  H[0][1] /= size;
  H[1][1] /= size;
  H[1][0] = H[0][1];
  C[0] /= size;
  C[1] /= size;
  Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
  if (Det < 1e-8) return;  // ill-posed, return default values
  x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
  x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
  xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
  xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
}

void encode_xq(int *xq, int *xqd) {
336
  xqd[0] = xq[0];
337
  xqd[0] = clamp(xqd[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
338
  xqd[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1];
339 340 341 342
  xqd[1] = clamp(xqd[1], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
}

static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
343
                                          int dat_stride, const uint8_t *src8,
344
                                          int src_stride, int use_highbitdepth,
345 346
                                          int bit_depth, int pu_width,
                                          int pu_height, int *eps, int *xqd,
347
                                          int32_t *rstbuf) {
348
  int32_t *flt1 = rstbuf;
349
  int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
350
  int ep, bestep = 0;
351 352
  int64_t err, besterr = -1;
  int exqd[2], bestxqd[2] = { 0, 0 };
353 354
  int flt1_stride = ((width + 7) & ~7) + 8;
  int flt2_stride = ((width + 7) & ~7) + 8;
355 356 357 358
  assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
         pu_width == RESTORATION_PROC_UNIT_SIZE);
  assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
         pu_height == RESTORATION_PROC_UNIT_SIZE);
359 360 361
#if !CONFIG_HIGHBITDEPTH
  (void)bit_depth;
#endif
362

363 364
  for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
    int exq[2];
365
#if CONFIG_HIGHBITDEPTH
366
    if (use_highbitdepth) {
367
      uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
368 369 370 371 372 373 374
      for (int i = 0; i < height; i += pu_height)
        for (int j = 0; j < width; j += pu_width) {
          const int w = AOMMIN(pu_width, width - j);
          const int h = AOMMIN(pu_height, height - i);
          uint16_t *dat_p = dat + i * dat_stride + j;
          int32_t *flt1_p = flt1 + i * flt1_stride + j;
          int32_t *flt2_p = flt2 + i * flt2_stride + j;
375
#if USE_HIGHPASS_IN_SGRPROJ
376 377 378
          av1_highpass_filter_highbd(dat_p, w, h, dat_stride, flt1_p,
                                     flt1_stride, sgr_params[ep].corner,
                                     sgr_params[ep].edge);
379
#else
380
          av1_selfguided_restoration_highbd(
381
              dat_p, w, h, dat_stride, flt1_p, flt1_stride, bit_depth,
382
              sgr_params[ep].r1, sgr_params[ep].e1);
383
#endif  // USE_HIGHPASS_IN_SGRPROJ
384
          av1_selfguided_restoration_highbd(
385
              dat_p, w, h, dat_stride, flt2_p, flt2_stride, bit_depth,
386
              sgr_params[ep].r2, sgr_params[ep].e2);
387
        }
388
    } else {
389
#endif
390 391 392 393 394 395 396
      for (int i = 0; i < height; i += pu_height)
        for (int j = 0; j < width; j += pu_width) {
          const int w = AOMMIN(pu_width, width - j);
          const int h = AOMMIN(pu_height, height - i);
          uint8_t *dat_p = dat8 + i * dat_stride + j;
          int32_t *flt1_p = flt1 + i * flt1_stride + j;
          int32_t *flt2_p = flt2 + i * flt2_stride + j;
397
#if USE_HIGHPASS_IN_SGRPROJ
398 399
          av1_highpass_filter(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
                              sgr_params[ep].corner, sgr_params[ep].edge);
400
#else
401
        av1_selfguided_restoration(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
402
                                   sgr_params[ep].r1, sgr_params[ep].e1);
403
#endif  // USE_HIGHPASS_IN_SGRPROJ
404 405
          av1_selfguided_restoration(dat_p, w, h, dat_stride, flt2_p,
                                     flt2_stride, sgr_params[ep].r2,
406
                                     sgr_params[ep].e2);
407
        }
408
#if CONFIG_HIGHBITDEPTH
409
    }
410
#endif
411
    aom_clear_system_state();
412
    get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
413 414
                      use_highbitdepth, flt1, flt1_stride, flt2, flt2_stride,
                      exq);
415
    aom_clear_system_state();
416
    encode_xq(exq, exqd);
417 418 419
    err = finer_search_pixel_proj_error(
        src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
        flt1, flt1_stride, flt2, flt2_stride, 2, exqd);
420 421 422 423 424 425 426 427 428 429 430 431
    if (besterr == -1 || err < besterr) {
      bestep = ep;
      besterr = err;
      bestxqd[0] = exqd[0];
      bestxqd[1] = exqd[1];
    }
  }
  *eps = bestep;
  xqd[0] = bestxqd[0];
  xqd[1] = bestxqd[1];
}

432 433 434 435 436 437 438 439 440 441 442 443 444 445
static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
                              SgrprojInfo *ref_sgrproj_info) {
  int bits = SGRPROJ_PARAMS_BITS;
  bits += aom_count_primitive_refsubexpfin(
      SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
      ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
      sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
  bits += aom_count_primitive_refsubexpfin(
      SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
      ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
      sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
  return bits;
}

446 447 448 449 450 451 452 453 454 455
struct rest_search_ctxt {
  const YV12_BUFFER_CONFIG *src;
  AV1_COMP *cpi;
  uint8_t *dgd_buffer;
  const uint8_t *src_buffer;
  int dgd_stride;
  int src_stride;
  int partial_frame;
  RestorationInfo *info;
  RestorationType *type;
456
  int64_t *best_tile_cost;
457 458 459 460 461 462 463 464 465 466 467
  int plane;
  int plane_width;
  int plane_height;
  int nrtiles_x;
  int nrtiles_y;
  YV12_BUFFER_CONFIG *dst_frame;
};

// Fill in ctxt. Returns the number of restoration tiles for this plane
static INLINE int init_rest_search_ctxt(
    const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane,
468
    RestorationInfo *info, RestorationType *type, int64_t *best_tile_cost,
469
    YV12_BUFFER_CONFIG *dst_frame, struct rest_search_ctxt *ctxt) {
470
  AV1_COMMON *const cm = &cpi->common;
471 472 473 474 475 476 477 478 479
  ctxt->src = src;
  ctxt->cpi = cpi;
  ctxt->partial_frame = partial_frame;
  ctxt->info = info;
  ctxt->type = type;
  ctxt->best_tile_cost = best_tile_cost;
  ctxt->plane = plane;
  ctxt->dst_frame = dst_frame;

480
  const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
481 482 483 484 485 486 487 488 489
  const int is_uv = plane != AOM_PLANE_Y;
  ctxt->plane_width = src->crop_widths[is_uv];
  ctxt->plane_height = src->crop_heights[is_uv];
  ctxt->src_buffer = src->buffers[plane];
  ctxt->src_stride = src->strides[is_uv];
  ctxt->dgd_buffer = dgd->buffers[plane];
  ctxt->dgd_stride = dgd->strides[is_uv];
  assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
  assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
490

491
  return av1_get_rest_ntiles(ctxt->plane_width, ctxt->plane_height,
492 493
                             cm->rst_info[plane].restoration_tilesize,
                             &ctxt->nrtiles_x, &ctxt->nrtiles_y);
494
}
495

496
typedef void (*rtile_visitor_t)(const struct rest_search_ctxt *search_ctxt,
497 498
                                int rtile_idx,
                                const RestorationTileLimits *limits, void *arg);
499 500 501 502 503 504

static void foreach_rtile_in_tile(const struct rest_search_ctxt *ctxt,
                                  int tile_row, int tile_col,
                                  rtile_visitor_t fun, void *arg) {
  const AV1_COMMON *const cm = &ctxt->cpi->common;
  const RestorationInfo *rsi = ctxt->cpi->rst_search;
Dominic Symes's avatar
Dominic Symes committed
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
  TileInfo tile_info;

  av1_tile_set_row(&tile_info, cm, tile_row);
  av1_tile_set_col(&tile_info, cm, tile_col);

  int tile_col_start = tile_info.mi_col_start * MI_SIZE;
  int tile_col_end = tile_info.mi_col_end * MI_SIZE;
  int tile_row_start = tile_info.mi_row_start * MI_SIZE;
  int tile_row_end = tile_info.mi_row_end * MI_SIZE;
  if (ctxt->plane > 0) {
    tile_col_start = ROUND_POWER_OF_TWO(tile_col_start, cm->subsampling_x);
    tile_col_end = ROUND_POWER_OF_TWO(tile_col_end, cm->subsampling_x);
    tile_row_start = ROUND_POWER_OF_TWO(tile_row_start, cm->subsampling_y);
    tile_row_end = ROUND_POWER_OF_TWO(tile_row_end, cm->subsampling_y);
  }
520

521 522 523 524 525
#if CONFIG_FRAME_SUPERRES
  // If upscaling is enabled, the tile limits need scaling to match the
  // upscaled frame where the restoration tiles live. To do this, scale up the
  // top-left and bottom-right of the tile.
  if (!av1_superres_unscaled(cm)) {
526 527 528 529
    av1_calculate_unscaled_superres_size(&tile_col_start, &tile_row_start,
                                         cm->superres_scale_denominator);
    av1_calculate_unscaled_superres_size(&tile_col_end, &tile_row_end,
                                         cm->superres_scale_denominator);
530 531 532 533 534 535
    // Make sure we don't fall off the bottom-right of the frame.
    tile_col_end = AOMMIN(tile_col_end, ctxt->plane_width);
    tile_row_end = AOMMIN(tile_row_end, ctxt->plane_height);
  }
#endif  // CONFIG_FRAME_SUPERRES

536
  const int rtile_size = rsi->restoration_tilesize;
Dominic Symes's avatar
Dominic Symes committed
537
  const int rtile_col0 = (tile_col_start + rtile_size - 1) / rtile_size;
538
  const int rtile_col1 =
Dominic Symes's avatar
Dominic Symes committed
539 540 541 542
      AOMMIN((tile_col_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_x);
  const int rtile_row0 = (tile_row_start + rtile_size - 1) / rtile_size;
  const int rtile_row1 =
      AOMMIN((tile_row_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_y);
543
  const int ss_y = ctxt->plane > 0 && cm->subsampling_y;
544 545 546 547

  for (int rtile_row = rtile_row0; rtile_row < rtile_row1; ++rtile_row) {
    for (int rtile_col = rtile_col0; rtile_col < rtile_col1; ++rtile_col) {
      const int rtile_idx = rtile_row * ctxt->nrtiles_x + rtile_col;
548
      RestorationTileLimits limits = av1_get_rest_tile_limits(
549 550
          rtile_idx, ctxt->nrtiles_x, ctxt->nrtiles_y, rtile_size,
          ctxt->plane_width, ctxt->plane_height, ss_y);
551
      fun(ctxt, rtile_idx, &limits, arg);
552
    }
553
  }
554 555 556
}

static void search_sgrproj_for_rtile(const struct rest_search_ctxt *ctxt,
557 558 559
                                     int rtile_idx,
                                     const RestorationTileLimits *limits,
                                     void *arg) {
560 561 562 563 564 565 566
  const MACROBLOCK *const x = &ctxt->cpi->td.mb;
  const AV1_COMMON *const cm = &ctxt->cpi->common;
  RestorationInfo *rsi = ctxt->cpi->rst_search;
  SgrprojInfo *sgrproj_info = ctxt->info->sgrproj_info;

  SgrprojInfo *ref_sgrproj_info = (SgrprojInfo *)arg;

567 568 569 570
  int64_t err =
      sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start,
                           limits->h_end - limits->h_start, limits->v_start,
                           limits->v_end - limits->v_start, (1 << ctxt->plane));
571
  // #bits when a tile is not restored
572
  int bits = x->sgrproj_restore_cost[0];
573
  double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
574
  ctxt->best_tile_cost[rtile_idx] = INT64_MAX;
575 576 577

  RestorationInfo *plane_rsi = &rsi[ctxt->plane];
  SgrprojInfo *rtile_sgrproj_info = &plane_rsi->sgrproj_info[rtile_idx];
578 579
  uint8_t *dgd_start =
      ctxt->dgd_buffer + limits->v_start * ctxt->dgd_stride + limits->h_start;
580
  const uint8_t *src_start =
581
      ctxt->src_buffer + limits->v_start * ctxt->src_stride + limits->h_start;
582

583
  search_selfguided_restoration(
584 585
      dgd_start, limits->h_end - limits->h_start,
      limits->v_end - limits->v_start, ctxt->dgd_stride, src_start,
586
      ctxt->src_stride,
587
#if CONFIG_HIGHBITDEPTH
588
      cm->use_highbitdepth, cm->bit_depth,
589
#else
590
      0, 8,
591
#endif  // CONFIG_HIGHBITDEPTH
592 593 594
      rsi[ctxt->plane].procunit_width, rsi[ctxt->plane].procunit_height,
      &rtile_sgrproj_info->ep, rtile_sgrproj_info->xqd,
      cm->rst_internal.tmpbuf);
595 596
  plane_rsi->restoration_type[rtile_idx] = RESTORE_SGRPROJ;
  err = try_restoration_tile(ctxt->src, ctxt->cpi, rsi, (1 << ctxt->plane),
597
                             ctxt->partial_frame, rtile_idx, ctxt->dst_frame);
598 599 600
  bits =
      count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx], ref_sgrproj_info)
      << AV1_PROB_COST_SHIFT;
601
  bits += x->sgrproj_restore_cost[1];
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
  double cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
  if (cost_sgrproj >= cost_norestore) {
    ctxt->type[rtile_idx] = RESTORE_NONE;
  } else {
    ctxt->type[rtile_idx] = RESTORE_SGRPROJ;
    *ref_sgrproj_info = sgrproj_info[rtile_idx] =
        plane_rsi->sgrproj_info[rtile_idx];
    ctxt->best_tile_cost[rtile_idx] = err;
  }
  plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE;
}

static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
                             int partial_frame, int plane,
                             RestorationInfo *info, RestorationType *type,
617
                             int64_t *best_tile_cost,
618
                             YV12_BUFFER_CONFIG *dst_frame) {
619
  const MACROBLOCK *const x = &cpi->td.mb;
620 621 622 623 624 625 626 627 628 629 630 631 632 633
  struct rest_search_ctxt ctxt;
  const int nrtiles =
      init_rest_search_ctxt(src, cpi, partial_frame, plane, info, type,
                            best_tile_cost, dst_frame, &ctxt);

  RestorationInfo *plane_rsi = &cpi->rst_search[plane];
  plane_rsi->frame_restoration_type = RESTORE_SGRPROJ;
  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
    plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE;
  }

  // Compute best Sgrproj filters for each rtile, one (encoder/decoder)
  // tile at a time.
  const AV1_COMMON *const cm = &cpi->common;
634 635 636
#if CONFIG_HIGHBITDEPTH
  if (cm->use_highbitdepth)
    extend_frame_highbd(CONVERT_TO_SHORTPTR(ctxt.dgd_buffer), ctxt.plane_width,
637 638
                        ctxt.plane_height, ctxt.dgd_stride, SGRPROJ_BORDER_HORZ,
                        SGRPROJ_BORDER_VERT);
639 640 641
  else
#endif
    extend_frame(ctxt.dgd_buffer, ctxt.plane_width, ctxt.plane_height,
642
                 ctxt.dgd_stride, SGRPROJ_BORDER_HORZ, SGRPROJ_BORDER_VERT);
643

644 645 646 647 648 649
  for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
    for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
      SgrprojInfo ref_sgrproj_info;
      set_default_sgrproj(&ref_sgrproj_info);
      foreach_rtile_in_tile(&ctxt, tile_row, tile_col, search_sgrproj_for_rtile,
                            &ref_sgrproj_info);
650 651
    }
  }
652

653
  // Cost for Sgrproj filtering
654
  SgrprojInfo ref_sgrproj_info;
655
  set_default_sgrproj(&ref_sgrproj_info);
656 657 658 659 660
  SgrprojInfo *sgrproj_info = info->sgrproj_info;

  int bits = frame_level_restore_bits[plane_rsi->frame_restoration_type]
             << AV1_PROB_COST_SHIFT;
  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
661
    bits += x->sgrproj_restore_cost[type[rtile_idx] != RESTORE_NONE];
662 663 664
    plane_rsi->sgrproj_info[rtile_idx] = sgrproj_info[rtile_idx];
    if (type[rtile_idx] == RESTORE_SGRPROJ) {
      bits += count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx],
665 666
                                 &ref_sgrproj_info)
              << AV1_PROB_COST_SHIFT;
667
      ref_sgrproj_info = plane_rsi->sgrproj_info[rtile_idx];
668
    }
669
    plane_rsi->restoration_type[rtile_idx] = type[rtile_idx];
670
  }
671 672
  int64_t err = try_restoration_frame(src, cpi, cpi->rst_search, (1 << plane),
                                      partial_frame, dst_frame);
673
  double cost_sgrproj = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err);
674 675 676
  return cost_sgrproj;
}

677 678
static double find_average(const uint8_t *src, int h_start, int h_end,
                           int v_start, int v_end, int stride) {
679 680 681
  uint64_t sum = 0;
  double avg = 0;
  int i, j;
682
  aom_clear_system_state();
683 684 685
  for (i = v_start; i < v_end; i++)
    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
686 687 688
  return avg;
}

689 690 691 692
static void compute_stats(int wiener_win, const uint8_t *dgd,
                          const uint8_t *src, int h_start, int h_end,
                          int v_start, int v_end, int dgd_stride,
                          int src_stride, double *M, double *H) {
693
  int i, j, k, l;
694
  double Y[WIENER_WIN2];
695 696
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin = (wiener_win >> 1);
697 698
  const double avg =
      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
699

700 701
  memset(M, 0, sizeof(*M) * wiener_win2);
  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
702 703
  for (i = v_start; i < v_end; i++) {
    for (j = h_start; j < h_end; j++) {
704 705
      const double X = (double)src[i * src_stride + j] - avg;
      int idx = 0;
706 707
      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
708 709 710 711
          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
          idx++;
        }
      }
712
      assert(idx == wiener_win2);
713
      for (k = 0; k < wiener_win2; ++k) {
714
        M[k] += Y[k] * X;
715 716
        H[k * wiener_win2 + k] += Y[k] * Y[k];
        for (l = k + 1; l < wiener_win2; ++l) {
717 718 719
          // H is a symmetric matrix, so we only need to fill out the upper
          // triangle here. We can copy it down to the lower triangle outside
          // the (i, j) loops.
720
          H[k * wiener_win2 + l] += Y[k] * Y[l];
721 722 723 724
        }
      }
    }
  }
725 726 727
  for (k = 0; k < wiener_win2; ++k) {
    for (l = k + 1; l < wiener_win2; ++l) {
      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
728 729
    }
  }
730 731
}

732
#if CONFIG_HIGHBITDEPTH
733
static double find_average_highbd(const uint16_t *src, int h_start, int h_end,
734
                                  int v_start, int v_end, int stride) {
735 736 737
  uint64_t sum = 0;
  double avg = 0;
  int i, j;
738
  aom_clear_system_state();
739 740 741
  for (i = v_start; i < v_end; i++)
    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
742 743 744
  return avg;
}

745 746 747 748
static void compute_stats_highbd(int wiener_win, const uint8_t *dgd8,
                                 const uint8_t *src8, int h_start, int h_end,
                                 int v_start, int v_end, int dgd_stride,
                                 int src_stride, double *M, double *H) {
749
  int i, j, k, l;
750
  double Y[WIENER_WIN2];
751 752
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin = (wiener_win >> 1);
753 754
  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
755 756
  const double avg =
      find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
757

758 759
  memset(M, 0, sizeof(*M) * wiener_win2);
  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
760 761
  for (i = v_start; i < v_end; i++) {
    for (j = h_start; j < h_end; j++) {
762 763
      const double X = (double)src[i * src_stride + j] - avg;
      int idx = 0;
764 765
      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
766 767 768 769
          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
          idx++;
        }
      }
770
      assert(idx == wiener_win2);
771
      for (k = 0; k < wiener_win2; ++k) {
772
        M[k] += Y[k] * X;
773 774
        H[k * wiener_win2 + k] += Y[k] * Y[k];
        for (l = k + 1; l < wiener_win2; ++l) {
775 776 777
          // H is a symmetric matrix, so we only need to fill out the upper
          // triangle here. We can copy it down to the lower triangle outside
          // the (i, j) loops.
778
          H[k * wiener_win2 + l] += Y[k] * Y[l];
779 780 781 782
        }
      }
    }
  }
783 784 785
  for (k = 0; k < wiener_win2; ++k) {
    for (l = k + 1; l < wiener_win2; ++l) {
      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
786 787
    }
  }
788
}
789
#endif  // CONFIG_HIGHBITDEPTH
790

791 792 793
static INLINE int wrap_index(int i, int wiener_win) {
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
  return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
794 795 796
}

// Fix vector b, update vector a
797 798
static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc,
                             double *a, double *b) {
799
  int i, j;
800
  double S[WIENER_WIN];
801
  double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
802 803
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
804 805
  memset(A, 0, sizeof(A));
  memset(B, 0, sizeof(B));
806 807 808
  for (i = 0; i < wiener_win; i++) {
    for (j = 0; j < wiener_win; ++j) {
      const int jj = wrap_index(j, wiener_win);
809 810 811
      A[jj] += Mc[i][j] * b[i];
    }
  }
812 813
  for (i = 0; i < wiener_win; i++) {
    for (j = 0; j < wiener_win; j++) {
814
      int k, l;
815 816 817 818 819 820
      for (k = 0; k < wiener_win; ++k)
        for (l = 0; l < wiener_win; ++l) {
          const int kk = wrap_index(k, wiener_win);
          const int ll = wrap_index(l, wiener_win);
          B[ll * wiener_halfwin1 + kk] +=
              Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j];
821 822 823
        }
    }
  }
Aamir Anis's avatar
Aamir Anis committed
824
  // Normalization enforcement in the system of equations itself
825
  for (i = 0; i < wiener_halfwin1 - 1; ++i)
826
    A[i] -=
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
        A[wiener_halfwin1 - 1] * 2 +
        B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
        2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
  for (i = 0; i < wiener_halfwin1 - 1; ++i)
    for (j = 0; j < wiener_halfwin1 - 1; ++j)
      B[i * wiener_halfwin1 + j] -=
          2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
               B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
               2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
                     (wiener_halfwin1 - 1)]);
  if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
    S[wiener_halfwin1 - 1] = 1.0;
    for (i = wiener_halfwin1; i < wiener_win; ++i) {
      S[i] = S[wiener_win - 1 - i];
      S[wiener_halfwin1 - 1] -= 2 * S[i];
842
    }
843
    memcpy(a, S, wiener_win * sizeof(*a));
844 845 846 847
  }
}

// Fix vector a, update vector b
848 849
static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc,
                             double *a, double *b) {
850
  int i, j;
851
  double S[WIENER_WIN];
852
  double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
853 854
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
855 856
  memset(A, 0, sizeof(A));
  memset(B, 0, sizeof(B));
857 858 859
  for (i = 0; i < wiener_win; i++) {
    const int ii = wrap_index(i, wiener_win);
    for (j = 0; j < wiener_win; j++) A[ii] += Mc[i][j] * a[j];