pickrst.c 58.1 KB
Newer Older
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 11 12
 */

#include <assert.h>
13
#include <float.h>
14 15 16
#include <limits.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
17
#include "./aom_scale_rtcd.h"
18

Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20 21
#include "aom_dsp/binary_codes_writer.h"
#include "aom_dsp/psnr.h"
Yaowu Xu's avatar
Yaowu Xu committed
22
#include "aom_mem/aom_mem.h"
23
#include "aom_ports/mem.h"
24
#include "aom_ports/system_state.h"
25

26 27
#include "av1/common/onyxc_int.h"
#include "av1/common/quant_common.h"
28
#include "av1/common/restoration.h"
29

30
#include "av1/encoder/av1_quantize.h"
31
#include "av1/encoder/encoder.h"
32
#include "av1/encoder/mathutils.h"
33 34
#include "av1/encoder/picklpf.h"
#include "av1/encoder/pickrst.h"
35

36
// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
37
// When set to RESTORE_TYPES we allow switchable.
38
static const RestorationType force_restore_type = RESTORE_TYPES;
39 40

// Number of Wiener iterations
41
#define NUM_WIENER_ITERS 5
42

43
typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src,
44 45
                                      AV1_COMP *cpi, int plane,
                                      RestorationInfo *info,
46
                                      RestorationType *rest_level,
47
                                      int64_t *best_tile_cost,
48
                                      YV12_BUFFER_CONFIG *dst_frame);
49

50
const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
51 52

static int64_t sse_restoration_tile(const YV12_BUFFER_CONFIG *src,
53 54
                                    const YV12_BUFFER_CONFIG *dst,
                                    const AV1_COMMON *cm, int h_start,
55
                                    int width, int v_start, int height,
56 57
                                    int components_pattern) {
  int64_t filt_err = 0;
58 59 60 61
  (void)cm;
  // Y and UV components cannot be mixed
  assert(components_pattern == 1 || components_pattern == 2 ||
         components_pattern == 4 || components_pattern == 6);
62
#if CONFIG_HIGHBITDEPTH
63
  if (cm->use_highbitdepth) {
64 65 66 67 68
    if ((components_pattern >> AOM_PLANE_Y) & 1) {
      filt_err +=
          aom_highbd_get_y_sse_part(src, dst, h_start, width, v_start, height);
    }
    if ((components_pattern >> AOM_PLANE_U) & 1) {
69 70
      filt_err +=
          aom_highbd_get_u_sse_part(src, dst, h_start, width, v_start, height);
71 72
    }
    if ((components_pattern >> AOM_PLANE_V) & 1) {
73 74
      filt_err +=
          aom_highbd_get_v_sse_part(src, dst, h_start, width, v_start, height);
75 76
    }
    return filt_err;
77
  }
78
#endif  // CONFIG_HIGHBITDEPTH
79 80 81 82
  if ((components_pattern >> AOM_PLANE_Y) & 1) {
    filt_err += aom_get_y_sse_part(src, dst, h_start, width, v_start, height);
  }
  if ((components_pattern >> AOM_PLANE_U) & 1) {
83
    filt_err += aom_get_u_sse_part(src, dst, h_start, width, v_start, height);
84 85
  }
  if ((components_pattern >> AOM_PLANE_V) & 1) {
86
    filt_err += aom_get_v_sse_part(src, dst, h_start, width, v_start, height);
87
  }
88 89 90
  return filt_err;
}

91 92
static int64_t sse_restoration_frame(AV1_COMMON *const cm,
                                     const YV12_BUFFER_CONFIG *src,
93 94 95
                                     const YV12_BUFFER_CONFIG *dst,
                                     int components_pattern) {
  int64_t filt_err = 0;
96
#if CONFIG_HIGHBITDEPTH
97 98 99 100 101 102 103 104 105 106 107 108
  if (cm->use_highbitdepth) {
    if ((components_pattern >> AOM_PLANE_Y) & 1) {
      filt_err += aom_highbd_get_y_sse(src, dst);
    }
    if ((components_pattern >> AOM_PLANE_U) & 1) {
      filt_err += aom_highbd_get_u_sse(src, dst);
    }
    if ((components_pattern >> AOM_PLANE_V) & 1) {
      filt_err += aom_highbd_get_v_sse(src, dst);
    }
    return filt_err;
  }
109 110
#else
  (void)cm;
111
#endif  // CONFIG_HIGHBITDEPTH
112 113 114 115 116 117 118 119 120 121 122 123
  if ((components_pattern >> AOM_PLANE_Y) & 1) {
    filt_err = aom_get_y_sse(src, dst);
  }
  if ((components_pattern >> AOM_PLANE_U) & 1) {
    filt_err += aom_get_u_sse(src, dst);
  }
  if ((components_pattern >> AOM_PLANE_V) & 1) {
    filt_err += aom_get_v_sse(src, dst);
  }
  return filt_err;
}

124 125
static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src,
                                    AV1_COMP *const cpi, RestorationInfo *rsi,
126
                                    int components_pattern, int tile_idx,
127
                                    YV12_BUFFER_CONFIG *dst_frame) {
128
  AV1_COMMON *const cm = &cpi->common;
129 130 131 132 133

  // Y and UV components cannot be mixed
  assert(components_pattern == 1 || components_pattern == 2 ||
         components_pattern == 4 || components_pattern == 6);

134 135 136 137 138 139 140 141 142
  const int is_uv = components_pattern > 1;
  const int width = src->crop_widths[is_uv];
  const int height = src->crop_heights[is_uv];

  const int rtile_size = cm->rst_info[is_uv].restoration_tilesize;
  const int ss_y = is_uv && cm->subsampling_y;

  int nhtiles, nvtiles;
  av1_get_rest_ntiles(width, height, rtile_size, &nhtiles, &nvtiles);
143

144
  av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, components_pattern,
145
                             dst_frame);
146
  RestorationTileLimits limits = av1_get_rest_tile_limits(
147 148
      tile_idx, nhtiles, nvtiles, rtile_size, width, height, ss_y);
  int64_t filt_err = sse_restoration_tile(
149 150
      src, dst_frame, cm, limits.h_start, limits.h_end - limits.h_start,
      limits.v_start, limits.v_end - limits.v_start, components_pattern);
151 152 153 154 155

  return filt_err;
}

static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
Yaowu Xu's avatar
Yaowu Xu committed
156
                                     AV1_COMP *const cpi, RestorationInfo *rsi,
157
                                     int components_pattern,
158
                                     YV12_BUFFER_CONFIG *dst_frame) {
Yaowu Xu's avatar
Yaowu Xu committed
159
  AV1_COMMON *const cm = &cpi->common;
160
  int64_t filt_err;
161
  av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, components_pattern,
162
                             dst_frame);
163
  filt_err = sse_restoration_frame(cm, src, dst_frame, components_pattern);
164 165 166
  return filt_err;
}

167 168
static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
                                    int src_stride, const uint8_t *dat8,
169
                                    int dat_stride, int use_highbitdepth,
170 171
                                    int32_t *flt1, int flt1_stride,
                                    int32_t *flt2, int flt2_stride, int *xqd) {
172 173 174 175
  int i, j;
  int64_t err = 0;
  int xq[2];
  decode_xq(xqd, xq);
176
  if (!use_highbitdepth) {
177 178 179 180 181 182 183 184
    const uint8_t *src = src8;
    const uint8_t *dat = dat8;
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const int32_t u =
            (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barker's avatar
David Barker committed
185
        const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
        const int32_t e =
            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
            src[i * src_stride + j];
        err += e * e;
      }
    }
  } else {
    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const int32_t u =
            (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barker's avatar
David Barker committed
201
        const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
202 203 204 205 206
        const int32_t e =
            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
            src[i * src_stride + j];
        err += e * e;
      }
207 208 209 210 211
    }
  }
  return err;
}

212 213
#define USE_SGRPROJ_REFINEMENT_SEARCH 1
static int64_t finer_search_pixel_proj_error(
214
    const uint8_t *src8, int width, int height, int src_stride,
215
    const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1,
216
    int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
217
  int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
218 219
                                     dat_stride, use_highbitdepth, flt1,
                                     flt1_stride, flt2, flt2_stride, xqd);
220 221 222 223 224 225 226 227 228 229 230 231
  (void)start_step;
#if USE_SGRPROJ_REFINEMENT_SEARCH
  int64_t err2;
  int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
  int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
  for (int s = start_step; s >= 1; s >>= 1) {
    for (int p = 0; p < 2; ++p) {
      int skip = 0;
      do {
        if (xqd[p] - s >= tap_min[p]) {
          xqd[p] -= s;
          err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
232 233
                                      dat_stride, use_highbitdepth, flt1,
                                      flt1_stride, flt2, flt2_stride, xqd);
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
          if (err2 > err) {
            xqd[p] += s;
          } else {
            err = err2;
            skip = 1;
            // At the highest step size continue moving in the same direction
            if (s == start_step) continue;
          }
        }
        break;
      } while (1);
      if (skip) break;
      do {
        if (xqd[p] + s <= tap_max[p]) {
          xqd[p] += s;
          err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
250 251
                                      dat_stride, use_highbitdepth, flt1,
                                      flt1_stride, flt2, flt2_stride, xqd);
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
          if (err2 > err) {
            xqd[p] -= s;
          } else {
            err = err2;
            // At the highest step size continue moving in the same direction
            if (s == start_step) continue;
          }
        }
        break;
      } while (1);
    }
  }
#endif  // USE_SGRPROJ_REFINEMENT_SEARCH
  return err;
}

268
static void get_proj_subspace(const uint8_t *src8, int width, int height,
269
                              int src_stride, uint8_t *dat8, int dat_stride,
270 271 272
                              int use_highbitdepth, int32_t *flt1,
                              int flt1_stride, int32_t *flt2, int flt2_stride,
                              int *xq) {
273 274 275 276 277 278 279
  int i, j;
  double H[2][2] = { { 0, 0 }, { 0, 0 } };
  double C[2] = { 0, 0 };
  double Det;
  double x[2];
  const int size = width * height;

280 281
  aom_clear_system_state();

282 283 284
  // Default
  xq[0] = 0;
  xq[1] = 0;
285
  if (!use_highbitdepth) {
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
    const uint8_t *src = src8;
    const uint8_t *dat = dat8;
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const double s =
            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
        const double f1 = (double)flt1[i * flt1_stride + j] - u;
        const double f2 = (double)flt2[i * flt2_stride + j] - u;
        H[0][0] += f1 * f1;
        H[1][1] += f2 * f2;
        H[0][1] += f1 * f2;
        C[0] += f1 * s;
        C[1] += f2 * s;
      }
    }
  } else {
    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const double s =
            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
        const double f1 = (double)flt1[i * flt1_stride + j] - u;
        const double f2 = (double)flt2[i * flt2_stride + j] - u;
        H[0][0] += f1 * f1;
        H[1][1] += f2 * f2;
        H[0][1] += f1 * f2;
        C[0] += f1 * s;
        C[1] += f2 * s;
      }
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
    }
  }
  H[0][0] /= size;
  H[0][1] /= size;
  H[1][1] /= size;
  H[1][0] = H[0][1];
  C[0] /= size;
  C[1] /= size;
  Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
  if (Det < 1e-8) return;  // ill-posed, return default values
  x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
  x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
  xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
  xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
}

void encode_xq(int *xq, int *xqd) {
335
  xqd[0] = xq[0];
336
  xqd[0] = clamp(xqd[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
337
  xqd[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1];
338 339 340 341
  xqd[1] = clamp(xqd[1], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
}

static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
342
                                          int dat_stride, const uint8_t *src8,
343
                                          int src_stride, int use_highbitdepth,
344 345
                                          int bit_depth, int pu_width,
                                          int pu_height, int *eps, int *xqd,
346
                                          int32_t *rstbuf) {
347
  int32_t *flt1 = rstbuf;
348
  int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
349
  int ep, bestep = 0;
350 351
  int64_t err, besterr = -1;
  int exqd[2], bestxqd[2] = { 0, 0 };
352 353
  int flt1_stride = ((width + 7) & ~7) + 8;
  int flt2_stride = ((width + 7) & ~7) + 8;
354 355 356 357
  assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
         pu_width == RESTORATION_PROC_UNIT_SIZE);
  assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
         pu_height == RESTORATION_PROC_UNIT_SIZE);
358 359 360
#if !CONFIG_HIGHBITDEPTH
  (void)bit_depth;
#endif
361

362 363
  for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
    int exq[2];
364
#if CONFIG_HIGHBITDEPTH
365
    if (use_highbitdepth) {
366
      uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
367 368 369 370 371 372 373
      for (int i = 0; i < height; i += pu_height)
        for (int j = 0; j < width; j += pu_width) {
          const int w = AOMMIN(pu_width, width - j);
          const int h = AOMMIN(pu_height, height - i);
          uint16_t *dat_p = dat + i * dat_stride + j;
          int32_t *flt1_p = flt1 + i * flt1_stride + j;
          int32_t *flt2_p = flt2 + i * flt2_stride + j;
374
#if USE_HIGHPASS_IN_SGRPROJ
375 376 377
          av1_highpass_filter_highbd(dat_p, w, h, dat_stride, flt1_p,
                                     flt1_stride, sgr_params[ep].corner,
                                     sgr_params[ep].edge);
378
#else
379
          av1_selfguided_restoration_highbd(
380
              dat_p, w, h, dat_stride, flt1_p, flt1_stride, bit_depth,
381
              sgr_params[ep].r1, sgr_params[ep].e1);
382
#endif  // USE_HIGHPASS_IN_SGRPROJ
383
          av1_selfguided_restoration_highbd(
384
              dat_p, w, h, dat_stride, flt2_p, flt2_stride, bit_depth,
385
              sgr_params[ep].r2, sgr_params[ep].e2);
386
        }
387
    } else {
388
#endif
389 390 391 392 393 394 395
      for (int i = 0; i < height; i += pu_height)
        for (int j = 0; j < width; j += pu_width) {
          const int w = AOMMIN(pu_width, width - j);
          const int h = AOMMIN(pu_height, height - i);
          uint8_t *dat_p = dat8 + i * dat_stride + j;
          int32_t *flt1_p = flt1 + i * flt1_stride + j;
          int32_t *flt2_p = flt2 + i * flt2_stride + j;
396
#if USE_HIGHPASS_IN_SGRPROJ
397 398
          av1_highpass_filter(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
                              sgr_params[ep].corner, sgr_params[ep].edge);
399
#else
400
        av1_selfguided_restoration(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
401
                                   sgr_params[ep].r1, sgr_params[ep].e1);
402
#endif  // USE_HIGHPASS_IN_SGRPROJ
403 404
          av1_selfguided_restoration(dat_p, w, h, dat_stride, flt2_p,
                                     flt2_stride, sgr_params[ep].r2,
405
                                     sgr_params[ep].e2);
406
        }
407
#if CONFIG_HIGHBITDEPTH
408
    }
409
#endif
410
    aom_clear_system_state();
411
    get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
412 413
                      use_highbitdepth, flt1, flt1_stride, flt2, flt2_stride,
                      exq);
414
    aom_clear_system_state();
415
    encode_xq(exq, exqd);
416 417 418
    err = finer_search_pixel_proj_error(
        src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
        flt1, flt1_stride, flt2, flt2_stride, 2, exqd);
419 420 421 422 423 424 425 426 427 428 429 430
    if (besterr == -1 || err < besterr) {
      bestep = ep;
      besterr = err;
      bestxqd[0] = exqd[0];
      bestxqd[1] = exqd[1];
    }
  }
  *eps = bestep;
  xqd[0] = bestxqd[0];
  xqd[1] = bestxqd[1];
}

431 432 433 434 435 436 437 438 439 440 441 442 443 444
static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
                              SgrprojInfo *ref_sgrproj_info) {
  int bits = SGRPROJ_PARAMS_BITS;
  bits += aom_count_primitive_refsubexpfin(
      SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
      ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
      sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
  bits += aom_count_primitive_refsubexpfin(
      SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
      ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
      sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
  return bits;
}

445 446 447 448 449 450 451 452 453
struct rest_search_ctxt {
  const YV12_BUFFER_CONFIG *src;
  AV1_COMP *cpi;
  uint8_t *dgd_buffer;
  const uint8_t *src_buffer;
  int dgd_stride;
  int src_stride;
  RestorationInfo *info;
  RestorationType *type;
454
  int64_t *best_tile_cost;
455 456 457 458 459 460 461 462 463 464
  int plane;
  int plane_width;
  int plane_height;
  int nrtiles_x;
  int nrtiles_y;
  YV12_BUFFER_CONFIG *dst_frame;
};

// Fill in ctxt. Returns the number of restoration tiles for this plane
static INLINE int init_rest_search_ctxt(
465
    const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int plane,
466
    RestorationInfo *info, RestorationType *type, int64_t *best_tile_cost,
467
    YV12_BUFFER_CONFIG *dst_frame, struct rest_search_ctxt *ctxt) {
468
  AV1_COMMON *const cm = &cpi->common;
469 470 471 472 473 474 475 476
  ctxt->src = src;
  ctxt->cpi = cpi;
  ctxt->info = info;
  ctxt->type = type;
  ctxt->best_tile_cost = best_tile_cost;
  ctxt->plane = plane;
  ctxt->dst_frame = dst_frame;

477
  const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
478 479 480 481 482 483 484 485 486
  const int is_uv = plane != AOM_PLANE_Y;
  ctxt->plane_width = src->crop_widths[is_uv];
  ctxt->plane_height = src->crop_heights[is_uv];
  ctxt->src_buffer = src->buffers[plane];
  ctxt->src_stride = src->strides[is_uv];
  ctxt->dgd_buffer = dgd->buffers[plane];
  ctxt->dgd_stride = dgd->strides[is_uv];
  assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
  assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
487

488
  return av1_get_rest_ntiles(ctxt->plane_width, ctxt->plane_height,
489 490
                             cm->rst_info[plane].restoration_tilesize,
                             &ctxt->nrtiles_x, &ctxt->nrtiles_y);
491
}
492

493
typedef void (*rtile_visitor_t)(const struct rest_search_ctxt *search_ctxt,
494 495
                                int rtile_idx,
                                const RestorationTileLimits *limits, void *arg);
496 497 498 499 500 501

static void foreach_rtile_in_tile(const struct rest_search_ctxt *ctxt,
                                  int tile_row, int tile_col,
                                  rtile_visitor_t fun, void *arg) {
  const AV1_COMMON *const cm = &ctxt->cpi->common;
  const RestorationInfo *rsi = ctxt->cpi->rst_search;
Dominic Symes's avatar
Dominic Symes committed
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
  TileInfo tile_info;

  av1_tile_set_row(&tile_info, cm, tile_row);
  av1_tile_set_col(&tile_info, cm, tile_col);

  int tile_col_start = tile_info.mi_col_start * MI_SIZE;
  int tile_col_end = tile_info.mi_col_end * MI_SIZE;
  int tile_row_start = tile_info.mi_row_start * MI_SIZE;
  int tile_row_end = tile_info.mi_row_end * MI_SIZE;
  if (ctxt->plane > 0) {
    tile_col_start = ROUND_POWER_OF_TWO(tile_col_start, cm->subsampling_x);
    tile_col_end = ROUND_POWER_OF_TWO(tile_col_end, cm->subsampling_x);
    tile_row_start = ROUND_POWER_OF_TWO(tile_row_start, cm->subsampling_y);
    tile_row_end = ROUND_POWER_OF_TWO(tile_row_end, cm->subsampling_y);
  }
517

518 519 520 521 522
#if CONFIG_FRAME_SUPERRES
  // If upscaling is enabled, the tile limits need scaling to match the
  // upscaled frame where the restoration tiles live. To do this, scale up the
  // top-left and bottom-right of the tile.
  if (!av1_superres_unscaled(cm)) {
523 524 525 526
    av1_calculate_unscaled_superres_size(&tile_col_start, &tile_row_start,
                                         cm->superres_scale_denominator);
    av1_calculate_unscaled_superres_size(&tile_col_end, &tile_row_end,
                                         cm->superres_scale_denominator);
527 528 529 530 531 532
    // Make sure we don't fall off the bottom-right of the frame.
    tile_col_end = AOMMIN(tile_col_end, ctxt->plane_width);
    tile_row_end = AOMMIN(tile_row_end, ctxt->plane_height);
  }
#endif  // CONFIG_FRAME_SUPERRES

533
  const int rtile_size = rsi->restoration_tilesize;
Dominic Symes's avatar
Dominic Symes committed
534
  const int rtile_col0 = (tile_col_start + rtile_size - 1) / rtile_size;
535
  const int rtile_col1 =
Dominic Symes's avatar
Dominic Symes committed
536 537 538 539
      AOMMIN((tile_col_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_x);
  const int rtile_row0 = (tile_row_start + rtile_size - 1) / rtile_size;
  const int rtile_row1 =
      AOMMIN((tile_row_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_y);
540
  const int ss_y = ctxt->plane > 0 && cm->subsampling_y;
541 542 543 544

  for (int rtile_row = rtile_row0; rtile_row < rtile_row1; ++rtile_row) {
    for (int rtile_col = rtile_col0; rtile_col < rtile_col1; ++rtile_col) {
      const int rtile_idx = rtile_row * ctxt->nrtiles_x + rtile_col;
545
      RestorationTileLimits limits = av1_get_rest_tile_limits(
546 547
          rtile_idx, ctxt->nrtiles_x, ctxt->nrtiles_y, rtile_size,
          ctxt->plane_width, ctxt->plane_height, ss_y);
548
      fun(ctxt, rtile_idx, &limits, arg);
549
    }
550
  }
551 552 553
}

static void search_sgrproj_for_rtile(const struct rest_search_ctxt *ctxt,
554 555 556
                                     int rtile_idx,
                                     const RestorationTileLimits *limits,
                                     void *arg) {
557 558 559 560 561 562 563
  const MACROBLOCK *const x = &ctxt->cpi->td.mb;
  const AV1_COMMON *const cm = &ctxt->cpi->common;
  RestorationInfo *rsi = ctxt->cpi->rst_search;
  SgrprojInfo *sgrproj_info = ctxt->info->sgrproj_info;

  SgrprojInfo *ref_sgrproj_info = (SgrprojInfo *)arg;

564 565 566 567
  int64_t err =
      sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start,
                           limits->h_end - limits->h_start, limits->v_start,
                           limits->v_end - limits->v_start, (1 << ctxt->plane));
568
  // #bits when a tile is not restored
569
  int bits = x->sgrproj_restore_cost[0];
570
  double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
571
  ctxt->best_tile_cost[rtile_idx] = INT64_MAX;
572 573 574

  RestorationInfo *plane_rsi = &rsi[ctxt->plane];
  SgrprojInfo *rtile_sgrproj_info = &plane_rsi->sgrproj_info[rtile_idx];
575 576
  uint8_t *dgd_start =
      ctxt->dgd_buffer + limits->v_start * ctxt->dgd_stride + limits->h_start;
577
  const uint8_t *src_start =
578
      ctxt->src_buffer + limits->v_start * ctxt->src_stride + limits->h_start;
579

580
  search_selfguided_restoration(
581 582
      dgd_start, limits->h_end - limits->h_start,
      limits->v_end - limits->v_start, ctxt->dgd_stride, src_start,
583
      ctxt->src_stride,
584
#if CONFIG_HIGHBITDEPTH
585
      cm->use_highbitdepth, cm->bit_depth,
586
#else
587
      0, 8,
588
#endif  // CONFIG_HIGHBITDEPTH
589
      rsi[ctxt->plane].procunit_width, rsi[ctxt->plane].procunit_height,
590
      &rtile_sgrproj_info->ep, rtile_sgrproj_info->xqd, cm->rst_tmpbuf);
591 592
  plane_rsi->restoration_type[rtile_idx] = RESTORE_SGRPROJ;
  err = try_restoration_tile(ctxt->src, ctxt->cpi, rsi, (1 << ctxt->plane),
593
                             rtile_idx, ctxt->dst_frame);
594 595 596
  bits =
      count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx], ref_sgrproj_info)
      << AV1_PROB_COST_SHIFT;
597
  bits += x->sgrproj_restore_cost[1];
598 599 600 601 602 603 604 605 606 607 608 609 610
  double cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
  if (cost_sgrproj >= cost_norestore) {
    ctxt->type[rtile_idx] = RESTORE_NONE;
  } else {
    ctxt->type[rtile_idx] = RESTORE_SGRPROJ;
    *ref_sgrproj_info = sgrproj_info[rtile_idx] =
        plane_rsi->sgrproj_info[rtile_idx];
    ctxt->best_tile_cost[rtile_idx] = err;
  }
  plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE;
}

static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
611 612
                             int plane, RestorationInfo *info,
                             RestorationType *type, int64_t *best_tile_cost,
613
                             YV12_BUFFER_CONFIG *dst_frame) {
614
  const MACROBLOCK *const x = &cpi->td.mb;
615
  struct rest_search_ctxt ctxt;
616 617
  const int nrtiles = init_rest_search_ctxt(src, cpi, plane, info, type,
                                            best_tile_cost, dst_frame, &ctxt);
618 619 620 621 622 623 624 625 626 627

  RestorationInfo *plane_rsi = &cpi->rst_search[plane];
  plane_rsi->frame_restoration_type = RESTORE_SGRPROJ;
  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
    plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE;
  }

  // Compute best Sgrproj filters for each rtile, one (encoder/decoder)
  // tile at a time.
  const AV1_COMMON *const cm = &cpi->common;
628 629 630
#if CONFIG_HIGHBITDEPTH
  if (cm->use_highbitdepth)
    extend_frame_highbd(CONVERT_TO_SHORTPTR(ctxt.dgd_buffer), ctxt.plane_width,
631 632
                        ctxt.plane_height, ctxt.dgd_stride, SGRPROJ_BORDER_HORZ,
                        SGRPROJ_BORDER_VERT);
633 634 635
  else
#endif
    extend_frame(ctxt.dgd_buffer, ctxt.plane_width, ctxt.plane_height,
636
                 ctxt.dgd_stride, SGRPROJ_BORDER_HORZ, SGRPROJ_BORDER_VERT);
637

638 639 640 641 642 643
  for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
    for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
      SgrprojInfo ref_sgrproj_info;
      set_default_sgrproj(&ref_sgrproj_info);
      foreach_rtile_in_tile(&ctxt, tile_row, tile_col, search_sgrproj_for_rtile,
                            &ref_sgrproj_info);
644 645
    }
  }
646

647
  // Cost for Sgrproj filtering
648
  SgrprojInfo ref_sgrproj_info;
649
  set_default_sgrproj(&ref_sgrproj_info);
650 651 652 653 654
  SgrprojInfo *sgrproj_info = info->sgrproj_info;

  int bits = frame_level_restore_bits[plane_rsi->frame_restoration_type]
             << AV1_PROB_COST_SHIFT;
  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
655
    bits += x->sgrproj_restore_cost[type[rtile_idx] != RESTORE_NONE];
656 657 658
    plane_rsi->sgrproj_info[rtile_idx] = sgrproj_info[rtile_idx];
    if (type[rtile_idx] == RESTORE_SGRPROJ) {
      bits += count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx],
659 660
                                 &ref_sgrproj_info)
              << AV1_PROB_COST_SHIFT;
661
      ref_sgrproj_info = plane_rsi->sgrproj_info[rtile_idx];
662
    }
663
    plane_rsi->restoration_type[rtile_idx] = type[rtile_idx];
664
  }
665 666
  int64_t err =
      try_restoration_frame(src, cpi, cpi->rst_search, (1 << plane), dst_frame);
667
  double cost_sgrproj = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err);
668 669 670
  return cost_sgrproj;
}

671 672
static double find_average(const uint8_t *src, int h_start, int h_end,
                           int v_start, int v_end, int stride) {
673 674 675
  uint64_t sum = 0;
  double avg = 0;
  int i, j;
676
  aom_clear_system_state();
677 678 679
  for (i = v_start; i < v_end; i++)
    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
680 681 682
  return avg;
}

683 684 685 686
static void compute_stats(int wiener_win, const uint8_t *dgd,
                          const uint8_t *src, int h_start, int h_end,
                          int v_start, int v_end, int dgd_stride,
                          int src_stride, double *M, double *H) {
687
  int i, j, k, l;
688
  double Y[WIENER_WIN2];
689 690
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin = (wiener_win >> 1);
691 692
  const double avg =
      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
693

694 695
  memset(M, 0, sizeof(*M) * wiener_win2);
  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
696 697
  for (i = v_start; i < v_end; i++) {
    for (j = h_start; j < h_end; j++) {
698 699
      const double X = (double)src[i * src_stride + j] - avg;
      int idx = 0;
700 701
      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
702 703 704 705
          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
          idx++;
        }
      }
706
      assert(idx == wiener_win2);
707
      for (k = 0; k < wiener_win2; ++k) {
708
        M[k] += Y[k] * X;
709 710
        H[k * wiener_win2 + k] += Y[k] * Y[k];
        for (l = k + 1; l < wiener_win2; ++l) {
711 712 713
          // H is a symmetric matrix, so we only need to fill out the upper
          // triangle here. We can copy it down to the lower triangle outside
          // the (i, j) loops.
714
          H[k * wiener_win2 + l] += Y[k] * Y[l];
715 716 717 718
        }
      }
    }
  }
719 720 721
  for (k = 0; k < wiener_win2; ++k) {
    for (l = k + 1; l < wiener_win2; ++l) {
      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
722 723
    }
  }
724 725
}

726
#if CONFIG_HIGHBITDEPTH
727
static double find_average_highbd(const uint16_t *src, int h_start, int h_end,
728
                                  int v_start, int v_end, int stride) {
729 730 731
  uint64_t sum = 0;
  double avg = 0;
  int i, j;
732
  aom_clear_system_state();
733 734 735
  for (i = v_start; i < v_end; i++)
    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
736 737 738
  return avg;
}

739 740 741 742
static void compute_stats_highbd(int wiener_win, const uint8_t *dgd8,
                                 const uint8_t *src8, int h_start, int h_end,
                                 int v_start, int v_end, int dgd_stride,
                                 int src_stride, double *M, double *H) {
743
  int i, j, k, l;
744
  double Y[WIENER_WIN2];
745 746
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin = (wiener_win >> 1);
747 748
  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
749 750
  const double avg =
      find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
751

752 753
  memset(M, 0, sizeof(*M) * wiener_win2);
  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
754 755
  for (i = v_start; i < v_end; i++) {
    for (j = h_start; j < h_end; j++) {
756 757
      const double X = (double)src[i * src_stride + j] - avg;
      int idx = 0;
758 759
      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
760 761 762 763
          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
          idx++;
        }
      }
764
      assert(idx == wiener_win2);
765
      for (k = 0; k < wiener_win2; ++k) {
766
        M[k] += Y[k] * X;
767 768
        H[k * wiener_win2 + k] += Y[k] * Y[k];
        for (l = k + 1; l < wiener_win2; ++l) {
769 770 771
          // H is a symmetric matrix, so we only need to fill out the upper
          // triangle here. We can copy it down to the lower triangle outside
          // the (i, j) loops.
772
          H[k * wiener_win2 + l] += Y[k] * Y[l];
773 774 775 776
        }
      }
    }
  }
777 778 779
  for (k = 0; k < wiener_win2; ++k) {
    for (l = k + 1; l < wiener_win2; ++l) {
      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
780 781
    }
  }
782
}
783
#endif  // CONFIG_HIGHBITDEPTH
784

785 786 787
static INLINE int wrap_index(int i, int wiener_win) {
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
  return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
788 789 790
}

// Fix vector b, update vector a
791 792
static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc,
                             double *a, double *b) {
793
  int i, j;
794
  double S[WIENER_WIN];
795
  double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
796 797
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
798 799
  memset(A, 0, sizeof(A));
  memset(B, 0, sizeof(B));
800 801 802
  for (i = 0; i < wiener_win; i++) {
    for (j = 0; j < wiener_win; ++j) {
      const int jj = wrap_index(j, wiener_win);
803 804 805
      A[jj] += Mc[i][j] * b[i];
    }
  }
806 807
  for (i = 0; i < wiener_win; i++) {
    for (j = 0; j < wiener_win; j++) {
808
      int k, l;
809 810 811 812 813 814
      for (k = 0; k < wiener_win; ++k)
        for (l = 0; l < wiener_win; ++l) {
          const int kk = wrap_index(k, wiener_win);
          const int ll = wrap_index(l, wiener_win);
          B[ll * wiener_halfwin1 + kk] +=
              Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j];
815 816 817
        }
    }
  }
Aamir Anis's avatar
Aamir Anis committed
818
  // Normalization enforcement in the system of equations itself
819
  for (i = 0; i < wiener_halfwin1 - 1; ++i)
820
    A[i] -=
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
        A[wiener_halfwin1 - 1] * 2 +
        B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
        2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
  for (i = 0; i < wiener_halfwin1 - 1; ++i)
    for (j = 0; j < wiener_halfwin1 - 1; ++j)
      B[i * wiener_halfwin1 + j] -=
          2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
               B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
               2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
                     (wiener_halfwin1 - 1)]);
  if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
    S[wiener_halfwin1 - 1] = 1.0;
    for (i = wiener_halfwin1; i < wiener_win; ++i) {
      S[i] = S[wiener_win - 1 - i];
      S[wiener_halfwin1 - 1] -= 2 * S[i];
836
    }
837
    memcpy(a, S, wiener_win * sizeof(*a));
838 839 840 841
  }
}

// Fix vector a, update vector b
842 843
static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc,
                             double *a, double *b) {
844
  int i, j;
845
  double S[WIENER_WIN];
846
  double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
847 848
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
849 850
  memset(A, 0, sizeof(A));
  memset(B, 0, sizeof(B));
851 852