pickrst.c 57.3 KB
Newer Older
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 11 12
 */

#include <assert.h>
13
#include <float.h>
14 15 16
#include <limits.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
17
#include "./aom_scale_rtcd.h"
18

Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20 21
#include "aom_dsp/binary_codes_writer.h"
#include "aom_dsp/psnr.h"
Yaowu Xu's avatar
Yaowu Xu committed
22
#include "aom_mem/aom_mem.h"
23
#include "aom_ports/mem.h"
24
#include "aom_ports/system_state.h"
25

26 27
#include "av1/common/onyxc_int.h"
#include "av1/common/quant_common.h"
28
#include "av1/common/restoration.h"
29

30
#include "av1/encoder/av1_quantize.h"
31
#include "av1/encoder/encoder.h"
32
#include "av1/encoder/mathutils.h"
33 34
#include "av1/encoder/picklpf.h"
#include "av1/encoder/pickrst.h"
35

36
// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
37
// When set to RESTORE_TYPES we allow switchable.
38
static const RestorationType force_restore_type = RESTORE_TYPES;
39 40

// Number of Wiener iterations
41
#define NUM_WIENER_ITERS 5
42

43
typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src,
44 45
                                      AV1_COMP *cpi, int plane,
                                      RestorationInfo *info,
46
                                      RestorationType *rest_level,
47
                                      int64_t *best_tile_cost,
48
                                      YV12_BUFFER_CONFIG *dst_frame);
49

50
const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
51 52

static int64_t sse_restoration_tile(const YV12_BUFFER_CONFIG *src,
53 54
                                    const YV12_BUFFER_CONFIG *dst,
                                    const AV1_COMMON *cm, int h_start,
55
                                    int width, int v_start, int height,
56 57
                                    int components_pattern) {
  int64_t filt_err = 0;
58 59 60 61
  (void)cm;
  // Y and UV components cannot be mixed
  assert(components_pattern == 1 || components_pattern == 2 ||
         components_pattern == 4 || components_pattern == 6);
62
#if CONFIG_HIGHBITDEPTH
63
  if (cm->use_highbitdepth) {
64 65 66 67 68
    if ((components_pattern >> AOM_PLANE_Y) & 1) {
      filt_err +=
          aom_highbd_get_y_sse_part(src, dst, h_start, width, v_start, height);
    }
    if ((components_pattern >> AOM_PLANE_U) & 1) {
69 70
      filt_err +=
          aom_highbd_get_u_sse_part(src, dst, h_start, width, v_start, height);
71 72
    }
    if ((components_pattern >> AOM_PLANE_V) & 1) {
73 74
      filt_err +=
          aom_highbd_get_v_sse_part(src, dst, h_start, width, v_start, height);
75 76
    }
    return filt_err;
77
  }
78
#endif  // CONFIG_HIGHBITDEPTH
79 80 81 82
  if ((components_pattern >> AOM_PLANE_Y) & 1) {
    filt_err += aom_get_y_sse_part(src, dst, h_start, width, v_start, height);
  }
  if ((components_pattern >> AOM_PLANE_U) & 1) {
83
    filt_err += aom_get_u_sse_part(src, dst, h_start, width, v_start, height);
84 85
  }
  if ((components_pattern >> AOM_PLANE_V) & 1) {
86
    filt_err += aom_get_v_sse_part(src, dst, h_start, width, v_start, height);
87
  }
88 89 90
  return filt_err;
}

91 92
static int64_t sse_restoration_frame(AV1_COMMON *const cm,
                                     const YV12_BUFFER_CONFIG *src,
93 94 95
                                     const YV12_BUFFER_CONFIG *dst,
                                     int components_pattern) {
  int64_t filt_err = 0;
96
#if CONFIG_HIGHBITDEPTH
97 98 99 100 101 102 103 104 105 106 107 108
  if (cm->use_highbitdepth) {
    if ((components_pattern >> AOM_PLANE_Y) & 1) {
      filt_err += aom_highbd_get_y_sse(src, dst);
    }
    if ((components_pattern >> AOM_PLANE_U) & 1) {
      filt_err += aom_highbd_get_u_sse(src, dst);
    }
    if ((components_pattern >> AOM_PLANE_V) & 1) {
      filt_err += aom_highbd_get_v_sse(src, dst);
    }
    return filt_err;
  }
109 110
#else
  (void)cm;
111
#endif  // CONFIG_HIGHBITDEPTH
112 113 114 115 116 117 118 119 120 121 122 123
  if ((components_pattern >> AOM_PLANE_Y) & 1) {
    filt_err = aom_get_y_sse(src, dst);
  }
  if ((components_pattern >> AOM_PLANE_U) & 1) {
    filt_err += aom_get_u_sse(src, dst);
  }
  if ((components_pattern >> AOM_PLANE_V) & 1) {
    filt_err += aom_get_v_sse(src, dst);
  }
  return filt_err;
}

124 125
static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src,
                                    AV1_COMP *const cpi, RestorationInfo *rsi,
126
                                    int components_pattern, int tile_idx,
127
                                    YV12_BUFFER_CONFIG *dst_frame) {
128
  AV1_COMMON *const cm = &cpi->common;
129 130 131 132 133

  // Y and UV components cannot be mixed
  assert(components_pattern == 1 || components_pattern == 2 ||
         components_pattern == 4 || components_pattern == 6);

134 135 136 137 138 139 140 141 142
  const int is_uv = components_pattern > 1;
  const int width = src->crop_widths[is_uv];
  const int height = src->crop_heights[is_uv];

  const int rtile_size = cm->rst_info[is_uv].restoration_tilesize;
  const int ss_y = is_uv && cm->subsampling_y;

  int nhtiles, nvtiles;
  av1_get_rest_ntiles(width, height, rtile_size, &nhtiles, &nvtiles);
143

144 145
  av1_loop_restoration_filter_frame(cm->frame_to_show, cm, rsi,
                                    components_pattern, dst_frame);
146
  RestorationTileLimits limits = av1_get_rest_tile_limits(
147 148
      tile_idx, nhtiles, nvtiles, rtile_size, width, height, ss_y);
  int64_t filt_err = sse_restoration_tile(
149 150
      src, dst_frame, cm, limits.h_start, limits.h_end - limits.h_start,
      limits.v_start, limits.v_end - limits.v_start, components_pattern);
151 152 153 154 155

  return filt_err;
}

static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
Yaowu Xu's avatar
Yaowu Xu committed
156
                                     AV1_COMP *const cpi, RestorationInfo *rsi,
157
                                     int components_pattern,
158
                                     YV12_BUFFER_CONFIG *dst_frame) {
Yaowu Xu's avatar
Yaowu Xu committed
159
  AV1_COMMON *const cm = &cpi->common;
160
  int64_t filt_err;
161 162
  av1_loop_restoration_filter_frame(cm->frame_to_show, cm, rsi,
                                    components_pattern, dst_frame);
163
  filt_err = sse_restoration_frame(cm, src, dst_frame, components_pattern);
164 165 166
  return filt_err;
}

167 168
static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
                                    int src_stride, const uint8_t *dat8,
169
                                    int dat_stride, int use_highbitdepth,
170 171
                                    int32_t *flt1, int flt1_stride,
                                    int32_t *flt2, int flt2_stride, int *xqd) {
172 173 174 175
  int i, j;
  int64_t err = 0;
  int xq[2];
  decode_xq(xqd, xq);
176
  if (!use_highbitdepth) {
177 178 179 180 181 182 183 184
    const uint8_t *src = src8;
    const uint8_t *dat = dat8;
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const int32_t u =
            (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barker's avatar
David Barker committed
185
        const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
        const int32_t e =
            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
            src[i * src_stride + j];
        err += e * e;
      }
    }
  } else {
    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const int32_t u =
            (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barker's avatar
David Barker committed
201
        const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
202 203 204 205 206
        const int32_t e =
            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
            src[i * src_stride + j];
        err += e * e;
      }
207 208 209 210 211
    }
  }
  return err;
}

212 213
#define USE_SGRPROJ_REFINEMENT_SEARCH 1
static int64_t finer_search_pixel_proj_error(
214
    const uint8_t *src8, int width, int height, int src_stride,
215
    const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1,
216
    int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
217
  int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
218 219
                                     dat_stride, use_highbitdepth, flt1,
                                     flt1_stride, flt2, flt2_stride, xqd);
220 221 222 223 224 225 226 227 228 229 230 231
  (void)start_step;
#if USE_SGRPROJ_REFINEMENT_SEARCH
  int64_t err2;
  int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
  int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
  for (int s = start_step; s >= 1; s >>= 1) {
    for (int p = 0; p < 2; ++p) {
      int skip = 0;
      do {
        if (xqd[p] - s >= tap_min[p]) {
          xqd[p] -= s;
          err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
232 233
                                      dat_stride, use_highbitdepth, flt1,
                                      flt1_stride, flt2, flt2_stride, xqd);
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
          if (err2 > err) {
            xqd[p] += s;
          } else {
            err = err2;
            skip = 1;
            // At the highest step size continue moving in the same direction
            if (s == start_step) continue;
          }
        }
        break;
      } while (1);
      if (skip) break;
      do {
        if (xqd[p] + s <= tap_max[p]) {
          xqd[p] += s;
          err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
250 251
                                      dat_stride, use_highbitdepth, flt1,
                                      flt1_stride, flt2, flt2_stride, xqd);
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
          if (err2 > err) {
            xqd[p] -= s;
          } else {
            err = err2;
            // At the highest step size continue moving in the same direction
            if (s == start_step) continue;
          }
        }
        break;
      } while (1);
    }
  }
#endif  // USE_SGRPROJ_REFINEMENT_SEARCH
  return err;
}

268
static void get_proj_subspace(const uint8_t *src8, int width, int height,
269
                              int src_stride, uint8_t *dat8, int dat_stride,
270 271 272
                              int use_highbitdepth, int32_t *flt1,
                              int flt1_stride, int32_t *flt2, int flt2_stride,
                              int *xq) {
273 274 275 276 277 278 279
  int i, j;
  double H[2][2] = { { 0, 0 }, { 0, 0 } };
  double C[2] = { 0, 0 };
  double Det;
  double x[2];
  const int size = width * height;

280 281
  aom_clear_system_state();

282 283 284
  // Default
  xq[0] = 0;
  xq[1] = 0;
285
  if (!use_highbitdepth) {
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
    const uint8_t *src = src8;
    const uint8_t *dat = dat8;
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const double s =
            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
        const double f1 = (double)flt1[i * flt1_stride + j] - u;
        const double f2 = (double)flt2[i * flt2_stride + j] - u;
        H[0][0] += f1 * f1;
        H[1][1] += f2 * f2;
        H[0][1] += f1 * f2;
        C[0] += f1 * s;
        C[1] += f2 * s;
      }
    }
  } else {
    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
    for (i = 0; i < height; ++i) {
      for (j = 0; j < width; ++j) {
        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
        const double s =
            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
        const double f1 = (double)flt1[i * flt1_stride + j] - u;
        const double f2 = (double)flt2[i * flt2_stride + j] - u;
        H[0][0] += f1 * f1;
        H[1][1] += f2 * f2;
        H[0][1] += f1 * f2;
        C[0] += f1 * s;
        C[1] += f2 * s;
      }
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
    }
  }
  H[0][0] /= size;
  H[0][1] /= size;
  H[1][1] /= size;
  H[1][0] = H[0][1];
  C[0] /= size;
  C[1] /= size;
  Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
  if (Det < 1e-8) return;  // ill-posed, return default values
  x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
  x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
  xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
  xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
}

void encode_xq(int *xq, int *xqd) {
335
  xqd[0] = xq[0];
336
  xqd[0] = clamp(xqd[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
337
  xqd[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1];
338 339 340 341
  xqd[1] = clamp(xqd[1], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
}

static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
342
                                          int dat_stride, const uint8_t *src8,
343
                                          int src_stride, int use_highbitdepth,
344 345
                                          int bit_depth, int pu_width,
                                          int pu_height, int *eps, int *xqd,
346
                                          int32_t *rstbuf) {
347
  int32_t *flt1 = rstbuf;
348
  int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
349
  int ep, bestep = 0;
350 351
  int64_t err, besterr = -1;
  int exqd[2], bestxqd[2] = { 0, 0 };
352 353
  int flt1_stride = ((width + 7) & ~7) + 8;
  int flt2_stride = ((width + 7) & ~7) + 8;
354 355 356 357
  assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
         pu_width == RESTORATION_PROC_UNIT_SIZE);
  assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
         pu_height == RESTORATION_PROC_UNIT_SIZE);
358 359 360
#if !CONFIG_HIGHBITDEPTH
  (void)bit_depth;
#endif
361

362 363
  for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
    int exq[2];
364
#if CONFIG_HIGHBITDEPTH
365
    if (use_highbitdepth) {
366
      uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
367 368 369 370 371 372 373
      for (int i = 0; i < height; i += pu_height)
        for (int j = 0; j < width; j += pu_width) {
          const int w = AOMMIN(pu_width, width - j);
          const int h = AOMMIN(pu_height, height - i);
          uint16_t *dat_p = dat + i * dat_stride + j;
          int32_t *flt1_p = flt1 + i * flt1_stride + j;
          int32_t *flt2_p = flt2 + i * flt2_stride + j;
374
#if USE_HIGHPASS_IN_SGRPROJ
375 376 377
          av1_highpass_filter_highbd(dat_p, w, h, dat_stride, flt1_p,
                                     flt1_stride, sgr_params[ep].corner,
                                     sgr_params[ep].edge);
378
#else
379
          av1_selfguided_restoration_highbd(
380
              dat_p, w, h, dat_stride, flt1_p, flt1_stride, bit_depth,
381
              sgr_params[ep].r1, sgr_params[ep].e1);
382
#endif  // USE_HIGHPASS_IN_SGRPROJ
383
          av1_selfguided_restoration_highbd(
384
              dat_p, w, h, dat_stride, flt2_p, flt2_stride, bit_depth,
385
              sgr_params[ep].r2, sgr_params[ep].e2);
386
        }
387
    } else {
388
#endif
389 390 391 392 393 394 395
      for (int i = 0; i < height; i += pu_height)
        for (int j = 0; j < width; j += pu_width) {
          const int w = AOMMIN(pu_width, width - j);
          const int h = AOMMIN(pu_height, height - i);
          uint8_t *dat_p = dat8 + i * dat_stride + j;
          int32_t *flt1_p = flt1 + i * flt1_stride + j;
          int32_t *flt2_p = flt2 + i * flt2_stride + j;
396
#if USE_HIGHPASS_IN_SGRPROJ
397 398
          av1_highpass_filter(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
                              sgr_params[ep].corner, sgr_params[ep].edge);
399
#else
400
        av1_selfguided_restoration(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
401
                                   sgr_params[ep].r1, sgr_params[ep].e1);
402
#endif  // USE_HIGHPASS_IN_SGRPROJ
403 404
          av1_selfguided_restoration(dat_p, w, h, dat_stride, flt2_p,
                                     flt2_stride, sgr_params[ep].r2,
405
                                     sgr_params[ep].e2);
406
        }
407
#if CONFIG_HIGHBITDEPTH
408
    }
409
#endif
410
    aom_clear_system_state();
411
    get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
412 413
                      use_highbitdepth, flt1, flt1_stride, flt2, flt2_stride,
                      exq);
414
    aom_clear_system_state();
415
    encode_xq(exq, exqd);
416 417 418
    err = finer_search_pixel_proj_error(
        src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
        flt1, flt1_stride, flt2, flt2_stride, 2, exqd);
419 420 421 422 423 424 425 426 427 428 429 430
    if (besterr == -1 || err < besterr) {
      bestep = ep;
      besterr = err;
      bestxqd[0] = exqd[0];
      bestxqd[1] = exqd[1];
    }
  }
  *eps = bestep;
  xqd[0] = bestxqd[0];
  xqd[1] = bestxqd[1];
}

431 432 433 434 435 436 437 438 439 440 441 442 443 444
static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
                              SgrprojInfo *ref_sgrproj_info) {
  int bits = SGRPROJ_PARAMS_BITS;
  bits += aom_count_primitive_refsubexpfin(
      SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
      ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
      sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
  bits += aom_count_primitive_refsubexpfin(
      SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
      ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
      sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
  return bits;
}

445 446 447 448 449 450 451 452 453
struct rest_search_ctxt {
  const YV12_BUFFER_CONFIG *src;
  AV1_COMP *cpi;
  uint8_t *dgd_buffer;
  const uint8_t *src_buffer;
  int dgd_stride;
  int src_stride;
  RestorationInfo *info;
  RestorationType *type;
454
  int64_t *best_tile_cost;
455 456 457 458 459 460 461 462 463 464
  int plane;
  int plane_width;
  int plane_height;
  int nrtiles_x;
  int nrtiles_y;
  YV12_BUFFER_CONFIG *dst_frame;
};

// Fill in ctxt. Returns the number of restoration tiles for this plane
static INLINE int init_rest_search_ctxt(
465
    const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int plane,
466
    RestorationInfo *info, RestorationType *type, int64_t *best_tile_cost,
467
    YV12_BUFFER_CONFIG *dst_frame, struct rest_search_ctxt *ctxt) {
468
  AV1_COMMON *const cm = &cpi->common;
469 470 471 472 473 474 475 476
  ctxt->src = src;
  ctxt->cpi = cpi;
  ctxt->info = info;
  ctxt->type = type;
  ctxt->best_tile_cost = best_tile_cost;
  ctxt->plane = plane;
  ctxt->dst_frame = dst_frame;

477
  const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
478 479 480 481 482 483 484 485 486
  const int is_uv = plane != AOM_PLANE_Y;
  ctxt->plane_width = src->crop_widths[is_uv];
  ctxt->plane_height = src->crop_heights[is_uv];
  ctxt->src_buffer = src->buffers[plane];
  ctxt->src_stride = src->strides[is_uv];
  ctxt->dgd_buffer = dgd->buffers[plane];
  ctxt->dgd_stride = dgd->strides[is_uv];
  assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
  assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
487

488
  return av1_get_rest_ntiles(ctxt->plane_width, ctxt->plane_height,
489 490
                             cm->rst_info[plane].restoration_tilesize,
                             &ctxt->nrtiles_x, &ctxt->nrtiles_y);
491
}
492

493
typedef void (*rtile_visitor_t)(const struct rest_search_ctxt *search_ctxt,
494 495
                                int rtile_idx,
                                const RestorationTileLimits *limits, void *arg);
496 497 498 499 500 501

static void foreach_rtile_in_tile(const struct rest_search_ctxt *ctxt,
                                  int tile_row, int tile_col,
                                  rtile_visitor_t fun, void *arg) {
  const AV1_COMMON *const cm = &ctxt->cpi->common;
  const RestorationInfo *rsi = ctxt->cpi->rst_search;
Dominic Symes's avatar
Dominic Symes committed
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
  TileInfo tile_info;

  av1_tile_set_row(&tile_info, cm, tile_row);
  av1_tile_set_col(&tile_info, cm, tile_col);

  int tile_col_start = tile_info.mi_col_start * MI_SIZE;
  int tile_col_end = tile_info.mi_col_end * MI_SIZE;
  int tile_row_start = tile_info.mi_row_start * MI_SIZE;
  int tile_row_end = tile_info.mi_row_end * MI_SIZE;
  if (ctxt->plane > 0) {
    tile_col_start = ROUND_POWER_OF_TWO(tile_col_start, cm->subsampling_x);
    tile_col_end = ROUND_POWER_OF_TWO(tile_col_end, cm->subsampling_x);
    tile_row_start = ROUND_POWER_OF_TWO(tile_row_start, cm->subsampling_y);
    tile_row_end = ROUND_POWER_OF_TWO(tile_row_end, cm->subsampling_y);
  }
517

518 519 520 521 522
#if CONFIG_FRAME_SUPERRES
  // If upscaling is enabled, the tile limits need scaling to match the
  // upscaled frame where the restoration tiles live. To do this, scale up the
  // top-left and bottom-right of the tile.
  if (!av1_superres_unscaled(cm)) {
523 524 525 526
    av1_calculate_unscaled_superres_size(&tile_col_start, &tile_row_start,
                                         cm->superres_scale_denominator);
    av1_calculate_unscaled_superres_size(&tile_col_end, &tile_row_end,
                                         cm->superres_scale_denominator);
527 528 529 530 531 532
    // Make sure we don't fall off the bottom-right of the frame.
    tile_col_end = AOMMIN(tile_col_end, ctxt->plane_width);
    tile_row_end = AOMMIN(tile_row_end, ctxt->plane_height);
  }
#endif  // CONFIG_FRAME_SUPERRES

533
  const int rtile_size = rsi->restoration_tilesize;
Dominic Symes's avatar
Dominic Symes committed
534
  const int rtile_col0 = (tile_col_start + rtile_size - 1) / rtile_size;
535
  const int rtile_col1 =
Dominic Symes's avatar
Dominic Symes committed
536 537 538 539
      AOMMIN((tile_col_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_x);
  const int rtile_row0 = (tile_row_start + rtile_size - 1) / rtile_size;
  const int rtile_row1 =
      AOMMIN((tile_row_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_y);
540
  const int ss_y = ctxt->plane > 0 && cm->subsampling_y;
541 542 543 544

  for (int rtile_row = rtile_row0; rtile_row < rtile_row1; ++rtile_row) {
    for (int rtile_col = rtile_col0; rtile_col < rtile_col1; ++rtile_col) {
      const int rtile_idx = rtile_row * ctxt->nrtiles_x + rtile_col;
545
      RestorationTileLimits limits = av1_get_rest_tile_limits(
546 547
          rtile_idx, ctxt->nrtiles_x, ctxt->nrtiles_y, rtile_size,
          ctxt->plane_width, ctxt->plane_height, ss_y);
548
      fun(ctxt, rtile_idx, &limits, arg);
549
    }
550
  }
551 552 553
}

static void search_sgrproj_for_rtile(const struct rest_search_ctxt *ctxt,
554 555 556
                                     int rtile_idx,
                                     const RestorationTileLimits *limits,
                                     void *arg) {
557 558 559 560 561
  const MACROBLOCK *const x = &ctxt->cpi->td.mb;
  const AV1_COMMON *const cm = &ctxt->cpi->common;
  RestorationInfo *rsi = ctxt->cpi->rst_search;
  SgrprojInfo *ref_sgrproj_info = (SgrprojInfo *)arg;

562 563 564 565
  int64_t err =
      sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start,
                           limits->h_end - limits->h_start, limits->v_start,
                           limits->v_end - limits->v_start, (1 << ctxt->plane));
566
  // #bits when a tile is not restored
567
  int bits = x->sgrproj_restore_cost[0];
568
  double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
569
  ctxt->best_tile_cost[rtile_idx] = INT64_MAX;
570

571 572
  RestorationUnitInfo *plane_rui = &rsi[ctxt->plane].unit_info[rtile_idx];
  SgrprojInfo *rtile_sgrproj_info = &plane_rui->sgrproj_info;
573 574
  uint8_t *dgd_start =
      ctxt->dgd_buffer + limits->v_start * ctxt->dgd_stride + limits->h_start;
575
  const uint8_t *src_start =
576
      ctxt->src_buffer + limits->v_start * ctxt->src_stride + limits->h_start;
577

578
  search_selfguided_restoration(
579 580
      dgd_start, limits->h_end - limits->h_start,
      limits->v_end - limits->v_start, ctxt->dgd_stride, src_start,
581
      ctxt->src_stride,
582
#if CONFIG_HIGHBITDEPTH
583
      cm->use_highbitdepth, cm->bit_depth,
584
#else
585
      0, 8,
586
#endif  // CONFIG_HIGHBITDEPTH
587
      rsi[ctxt->plane].procunit_width, rsi[ctxt->plane].procunit_height,
588
      &rtile_sgrproj_info->ep, rtile_sgrproj_info->xqd, cm->rst_tmpbuf);
589
  plane_rui->restoration_type = RESTORE_SGRPROJ;
590
  err = try_restoration_tile(ctxt->src, ctxt->cpi, rsi, (1 << ctxt->plane),
591
                             rtile_idx, ctxt->dst_frame);
592 593
  bits = count_sgrproj_bits(rtile_sgrproj_info, ref_sgrproj_info)
         << AV1_PROB_COST_SHIFT;
594
  bits += x->sgrproj_restore_cost[1];
595 596 597 598 599
  double cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
  if (cost_sgrproj >= cost_norestore) {
    ctxt->type[rtile_idx] = RESTORE_NONE;
  } else {
    ctxt->type[rtile_idx] = RESTORE_SGRPROJ;
600 601
    SgrprojInfo *sgrproj_info = &ctxt->info->unit_info[rtile_idx].sgrproj_info;
    *ref_sgrproj_info = *sgrproj_info = plane_rui->sgrproj_info;
602 603
    ctxt->best_tile_cost[rtile_idx] = err;
  }
604
  plane_rui->restoration_type = RESTORE_NONE;
605 606 607
}

static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
608 609
                             int plane, RestorationInfo *info,
                             RestorationType *type, int64_t *best_tile_cost,
610
                             YV12_BUFFER_CONFIG *dst_frame) {
611
  const MACROBLOCK *const x = &cpi->td.mb;
612
  struct rest_search_ctxt ctxt;
613 614
  const int nrtiles = init_rest_search_ctxt(src, cpi, plane, info, type,
                                            best_tile_cost, dst_frame, &ctxt);
615 616 617 618

  RestorationInfo *plane_rsi = &cpi->rst_search[plane];
  plane_rsi->frame_restoration_type = RESTORE_SGRPROJ;
  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
619
    plane_rsi->unit_info[rtile_idx].restoration_type = RESTORE_NONE;
620 621 622 623 624
  }

  // Compute best Sgrproj filters for each rtile, one (encoder/decoder)
  // tile at a time.
  const AV1_COMMON *const cm = &cpi->common;
625
#if CONFIG_HIGHBITDEPTH
626 627 628
  const int highbd = cm->use_highbitdepth;
#else
  const int highbd = 0;
629
#endif
630 631 632
  extend_frame(ctxt.dgd_buffer, ctxt.plane_width, ctxt.plane_height,
               ctxt.dgd_stride, SGRPROJ_BORDER_HORZ, SGRPROJ_BORDER_VERT,
               highbd);
633

634 635 636 637 638 639
  for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
    for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
      SgrprojInfo ref_sgrproj_info;
      set_default_sgrproj(&ref_sgrproj_info);
      foreach_rtile_in_tile(&ctxt, tile_row, tile_col, search_sgrproj_for_rtile,
                            &ref_sgrproj_info);
640 641
    }
  }
642

643
  // Cost for Sgrproj filtering
644
  SgrprojInfo ref_sgrproj_info;
645
  set_default_sgrproj(&ref_sgrproj_info);
646 647 648 649

  int bits = frame_level_restore_bits[plane_rsi->frame_restoration_type]
             << AV1_PROB_COST_SHIFT;
  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
650 651 652
    RestorationUnitInfo *plane_rui = &plane_rsi->unit_info[rtile_idx];
    RestorationUnitInfo *search_rui = &info->unit_info[rtile_idx];

653
    bits += x->sgrproj_restore_cost[type[rtile_idx] != RESTORE_NONE];
654
    plane_rui->sgrproj_info = search_rui->sgrproj_info;
655
    if (type[rtile_idx] == RESTORE_SGRPROJ) {
656
      bits += count_sgrproj_bits(&plane_rui->sgrproj_info, &ref_sgrproj_info)
657
              << AV1_PROB_COST_SHIFT;
658
      ref_sgrproj_info = plane_rui->sgrproj_info;
659
    }
660
    plane_rui->restoration_type = type[rtile_idx];
661
  }
662 663
  int64_t err =
      try_restoration_frame(src, cpi, cpi->rst_search, (1 << plane), dst_frame);
664
  double cost_sgrproj = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err);
665 666 667
  return cost_sgrproj;
}

668 669
static double find_average(const uint8_t *src, int h_start, int h_end,
                           int v_start, int v_end, int stride) {
670 671 672
  uint64_t sum = 0;
  double avg = 0;
  int i, j;
673
  aom_clear_system_state();
674 675 676
  for (i = v_start; i < v_end; i++)
    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
677 678 679
  return avg;
}

680 681 682 683
static void compute_stats(int wiener_win, const uint8_t *dgd,
                          const uint8_t *src, int h_start, int h_end,
                          int v_start, int v_end, int dgd_stride,
                          int src_stride, double *M, double *H) {
684
  int i, j, k, l;
685
  double Y[WIENER_WIN2];
686 687
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin = (wiener_win >> 1);
688 689
  const double avg =
      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
690

691 692
  memset(M, 0, sizeof(*M) * wiener_win2);
  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
693 694
  for (i = v_start; i < v_end; i++) {
    for (j = h_start; j < h_end; j++) {
695 696
      const double X = (double)src[i * src_stride + j] - avg;
      int idx = 0;
697 698
      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
699 700 701 702
          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
          idx++;
        }
      }
703
      assert(idx == wiener_win2);
704
      for (k = 0; k < wiener_win2; ++k) {
705
        M[k] += Y[k] * X;
706 707
        H[k * wiener_win2 + k] += Y[k] * Y[k];
        for (l = k + 1; l < wiener_win2; ++l) {
708 709 710
          // H is a symmetric matrix, so we only need to fill out the upper
          // triangle here. We can copy it down to the lower triangle outside
          // the (i, j) loops.
711
          H[k * wiener_win2 + l] += Y[k] * Y[l];
712 713 714 715
        }
      }
    }
  }
716 717 718
  for (k = 0; k < wiener_win2; ++k) {
    for (l = k + 1; l < wiener_win2; ++l) {
      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
719 720
    }
  }
721 722
}

723
#if CONFIG_HIGHBITDEPTH
724
static double find_average_highbd(const uint16_t *src, int h_start, int h_end,
725
                                  int v_start, int v_end, int stride) {
726 727 728
  uint64_t sum = 0;
  double avg = 0;
  int i, j;
729
  aom_clear_system_state();
730 731 732
  for (i = v_start; i < v_end; i++)
    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
733 734 735
  return avg;
}

736 737 738 739
static void compute_stats_highbd(int wiener_win, const uint8_t *dgd8,
                                 const uint8_t *src8, int h_start, int h_end,
                                 int v_start, int v_end, int dgd_stride,
                                 int src_stride, double *M, double *H) {
740
  int i, j, k, l;
741
  double Y[WIENER_WIN2];
742 743
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin = (wiener_win >> 1);
744 745
  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
746 747
  const double avg =
      find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
748

749 750
  memset(M, 0, sizeof(*M) * wiener_win2);
  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
751 752
  for (i = v_start; i < v_end; i++) {
    for (j = h_start; j < h_end; j++) {
753 754
      const double X = (double)src[i * src_stride + j] - avg;
      int idx = 0;
755 756
      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
757 758 759 760
          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
          idx++;
        }
      }
761
      assert(idx == wiener_win2);
762
      for (k = 0; k < wiener_win2; ++k) {
763
        M[k] += Y[k] * X;
764 765
        H[k * wiener_win2 + k] += Y[k] * Y[k];
        for (l = k + 1; l < wiener_win2; ++l) {
766 767 768
          // H is a symmetric matrix, so we only need to fill out the upper
          // triangle here. We can copy it down to the lower triangle outside
          // the (i, j) loops.
769
          H[k * wiener_win2 + l] += Y[k] * Y[l];
770 771 772 773
        }
      }
    }
  }
774 775 776
  for (k = 0; k < wiener_win2; ++k) {
    for (l = k + 1; l < wiener_win2; ++l) {
      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
777 778
    }
  }
779
}
780
#endif  // CONFIG_HIGHBITDEPTH
781

782 783 784
static INLINE int wrap_index(int i, int wiener_win) {
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
  return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
785 786 787
}

// Fix vector b, update vector a
788 789
static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc,
                             double *a, double *b) {
790
  int i, j;
791
  double S[WIENER_WIN];
792
  double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
793 794
  const int wiener_win2 = wiener_win * wiener_win;
  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
795 796
  memset(A, 0, sizeof(A));
  memset(B, 0, sizeof(B));
797 798 799
  for (i = 0; i < wiener_win; i++) {
    for (j = 0; j < wiener_win; ++j) {
      const int jj = wrap_index(j, wiener_win);
800 801 802
      A[jj] += Mc[i][j] * b[i];
    }
  }
803 804
  for (i = 0; i < wiener_win; i++) {
    for (j = 0; j < wiener_win; j++) {
805
      int k, l;
806 807 808 809 810 811
      for (k = 0; k < wiener_win; ++k)
        for (l = 0; l < wiener_win; ++l) {
          const int kk = wrap_index(k, wiener_win);
          const int ll = wrap_index(l, wiener_win);
          B[ll * wiener_halfwin1 + kk] +=
              Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j];
812 813 814
        }
    }
  }
Aamir Anis's avatar
Aamir Anis committed
815
  // Normalization enforcement in the system of equations itself
816
  for (i = 0; i < wiener_halfwin1 - 1; ++i)
817
    A[i] -=
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
        A[wiener_halfwin1 - 1] * 2 +
        B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
        2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
  for (i = 0; i < wiener_halfwin1 - 1; ++i)
    for (j = 0; j < wiener_halfwin1 - 1; ++j)
      B[i * wiener_halfwin1 + j] -=
          2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
               B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
               2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
                     (wiener_halfwin1 - 1)]);
  if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
    S[wiener_halfwin1 - 1] = 1.0;
    for (i = wiener_halfwin1; i < wiener_win; ++i) {
      S[i] = S[wiener_win - 1 - i];
      S[wiener_halfwin1 - 1] -= 2 * S[i];
833
    }
834
    memcpy(a, S, wiener_win * sizeof(*a));
835 836 837 838
  }
}

// Fix vector a, update vector b
839 840
static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc,
                             double *a, double *b) {
841
  int i, j;
Debargha Mukherjee's avatar