cdef_block.c 21.2 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8 9 10
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */
11 12 13 14

#include <math.h>
#include <stdlib.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#ifdef HAVE_CONFIG_H
16
#include "./config.h"
Yaowu Xu's avatar
Yaowu Xu committed
17 18
#endif

19
#include "./aom_dsp_rtcd.h"
20
#include "./av1_rtcd.h"
21
#include "./cdef.h"
Yaowu Xu's avatar
Yaowu Xu committed
22 23

/* Generated from gen_filter_tables.c. */
24
#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
Yaowu Xu's avatar
Yaowu Xu committed
25
DECLARE_ALIGNED(16, const int, cdef_directions[8][3]) = {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
26 27 28 29 30 31 32 33
  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 },
  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
Yaowu Xu's avatar
Yaowu Xu committed
34
};
35
#else
Yaowu Xu's avatar
Yaowu Xu committed
36
DECLARE_ALIGNED(16, const int, cdef_directions[8][2]) = {
37 38 39 40 41 42 43 44 45 46
  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
};
#endif
Yaowu Xu's avatar
Yaowu Xu committed
47 48 49 50 51 52 53 54

/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
   The search minimizes the weighted variance along all the lines in a
   particular direction, i.e. the squared error between the input and a
   "predicted" block where each pixel is replaced by the average along a line
   in a particular direction. Since each direction have the same sum(x^2) term,
   that term is never computed. See Section 2, step 2, of:
   http://jmvalin.ca/notes/intra_paint.pdf */
Steinar Midtskogen's avatar
Steinar Midtskogen committed
55 56
int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
                    int coeff_shift) {
Yaowu Xu's avatar
Yaowu Xu committed
57
  int i;
clang-format's avatar
clang-format committed
58 59
  int32_t cost[8] = { 0 };
  int partial[8][15] = { { 0 } };
Yaowu Xu's avatar
Yaowu Xu committed
60 61 62 63 64
  int32_t best_cost = 0;
  int best_dir = 0;
  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
     The output is then 840 times larger, but we don't care for finding
     the max. */
clang-format's avatar
clang-format committed
65
  static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
Yaowu Xu's avatar
Yaowu Xu committed
66 67 68 69 70 71
  for (i = 0; i < 8; i++) {
    int j;
    for (j = 0; j < 8; j++) {
      int x;
      /* We subtract 128 here to reduce the maximum range of the squared
         partial sums. */
clang-format's avatar
clang-format committed
72
      x = (img[i * stride + j] >> coeff_shift) - 128;
Yaowu Xu's avatar
Yaowu Xu committed
73
      partial[0][i + j] += x;
clang-format's avatar
clang-format committed
74
      partial[1][i + j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
75
      partial[2][i] += x;
clang-format's avatar
clang-format committed
76
      partial[3][3 + i - j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
77
      partial[4][7 + i - j] += x;
clang-format's avatar
clang-format committed
78
      partial[5][3 - i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
79
      partial[6][j] += x;
clang-format's avatar
clang-format committed
80
      partial[7][i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
81 82 83
    }
  }
  for (i = 0; i < 8; i++) {
clang-format's avatar
clang-format committed
84 85
    cost[2] += partial[2][i] * partial[2][i];
    cost[6] += partial[6][i] * partial[6][i];
Yaowu Xu's avatar
Yaowu Xu committed
86 87 88 89
  }
  cost[2] *= div_table[8];
  cost[6] *= div_table[8];
  for (i = 0; i < 7; i++) {
clang-format's avatar
clang-format committed
90 91 92 93 94 95
    cost[0] += (partial[0][i] * partial[0][i] +
                partial[0][14 - i] * partial[0][14 - i]) *
               div_table[i + 1];
    cost[4] += (partial[4][i] * partial[4][i] +
                partial[4][14 - i] * partial[4][14 - i]) *
               div_table[i + 1];
Yaowu Xu's avatar
Yaowu Xu committed
96
  }
clang-format's avatar
clang-format committed
97 98
  cost[0] += partial[0][7] * partial[0][7] * div_table[8];
  cost[4] += partial[4][7] * partial[4][7] * div_table[8];
Yaowu Xu's avatar
Yaowu Xu committed
99 100 101
  for (i = 1; i < 8; i += 2) {
    int j;
    for (j = 0; j < 4 + 1; j++) {
clang-format's avatar
clang-format committed
102
      cost[i] += partial[i][3 + j] * partial[i][3 + j];
Yaowu Xu's avatar
Yaowu Xu committed
103 104 105
    }
    cost[i] *= div_table[8];
    for (j = 0; j < 4 - 1; j++) {
clang-format's avatar
clang-format committed
106 107 108
      cost[i] += (partial[i][j] * partial[i][j] +
                  partial[i][10 - j] * partial[i][10 - j]) *
                 div_table[2 * j + 2];
Yaowu Xu's avatar
Yaowu Xu committed
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
    }
  }
  for (i = 0; i < 8; i++) {
    if (cost[i] > best_cost) {
      best_cost = cost[i];
      best_dir = i;
    }
  }
  /* Difference between the optimal variance and the variance along the
     orthogonal direction. Again, the sum(x^2) terms cancel out. */
  *var = best_cost - cost[(best_dir + 4) & 7];
  /* We'd normally divide by 840, but dividing by 1024 is close enough
     for what we're going to do with this. */
  *var >>= 10;
  return best_dir;
}

126 127 128 129 130 131 132 133 134 135 136 137 138 139
#if CONFIG_CDEF_SINGLEPASS
#if CDEF_FULL
const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
#else
const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
#endif

/* Smooth in the direction detected. */
#if CDEF_CAP
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
                         const uint16_t *in, int pri_strength, int sec_strength,
                         int dir, int pri_damping, int sec_damping, int bsize,
140
                         AOM_UNUSED int max_unused)
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
#else
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
                         const uint16_t *in, int pri_strength, int sec_strength,
                         int dir, int pri_damping, int sec_damping, int bsize,
                         int max)
#endif
{
  int i, j, k;
  const int s = CDEF_BSTRIDE;
  const int *pri_taps = cdef_pri_taps[pri_strength & 1];
  const int *sec_taps = cdef_sec_taps[pri_strength & 1];
  for (i = 0; i < 4 << (bsize == BLOCK_8X8); i++) {
    for (j = 0; j < 4 << (bsize == BLOCK_8X8); j++) {
      int16_t sum = 0;
      int16_t y;
      int16_t x = in[i * s + j];
#if CDEF_CAP
      int max = x;
      int min = x;
#endif
#if CDEF_FULL
      for (k = 0; k < 3; k++)
#else
      for (k = 0; k < 2; k++)
#endif
      {
        int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
        int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
        sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
        sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
#if CDEF_CAP
        if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
        if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
        min = AOMMIN(p0, min);
        min = AOMMIN(p1, min);
#endif
#if CDEF_FULL
        if (k == 2) continue;
#endif
        int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
        int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
        int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
        int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
#if CDEF_CAP
        if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
        if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
        if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
        if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
        min = AOMMIN(s0, min);
        min = AOMMIN(s1, min);
        min = AOMMIN(s2, min);
        min = AOMMIN(s3, min);
#endif
        sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
      }
#if CDEF_CAP
      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
#else
      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), 0, max);
#endif
      if (dst8)
        dst8[i * dstride + j] = (uint8_t)y;
      else
        dst16[i * dstride + j] = (uint16_t)y;
    }
  }
}

#else

Yaowu Xu's avatar
Yaowu Xu committed
214
/* Smooth in the direction detected. */
Steinar Midtskogen's avatar
Steinar Midtskogen committed
215 216
void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
                          int threshold, int dir, int damping) {
Yaowu Xu's avatar
Yaowu Xu committed
217 218 219
  int i;
  int j;
  int k;
220
  static const int taps[3] = { 3, 2, 1 };
221 222
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++) {
Yaowu Xu's avatar
Yaowu Xu committed
223 224 225
      int16_t sum;
      int16_t xx;
      int16_t yy;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
226
      xx = in[i * CDEF_BSTRIDE + j];
clang-format's avatar
clang-format committed
227
      sum = 0;
Yaowu Xu's avatar
Yaowu Xu committed
228 229 230
      for (k = 0; k < 3; k++) {
        int16_t p0;
        int16_t p1;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
231 232
        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
233 234
        sum += taps[k] * constrain(p0, threshold, damping);
        sum += taps[k] * constrain(p1, threshold, damping);
Yaowu Xu's avatar
Yaowu Xu committed
235
      }
236 237
      sum = (sum + 8) >> 4;
      yy = xx + sum;
clang-format's avatar
clang-format committed
238
      y[i * ystride + j] = yy;
Yaowu Xu's avatar
Yaowu Xu committed
239 240 241 242
    }
  }
}

243
/* Smooth in the direction detected. */
Steinar Midtskogen's avatar
Steinar Midtskogen committed
244 245
void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
                          int threshold, int dir, int damping) {
246 247 248 249 250 251 252 253 254
  int i;
  int j;
  int k;
  static const int taps[2] = { 4, 1 };
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      int16_t sum;
      int16_t xx;
      int16_t yy;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
255
      xx = in[i * CDEF_BSTRIDE + j];
256 257 258 259
      sum = 0;
      for (k = 0; k < 2; k++) {
        int16_t p0;
        int16_t p1;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
260 261
        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
262 263
        sum += taps[k] * constrain(p0, threshold, damping);
        sum += taps[k] * constrain(p1, threshold, damping);
264
      }
265 266
      sum = (sum + 8) >> 4;
      yy = xx + sum;
267 268 269
      y[i * ystride + j] = yy;
    }
  }
Yaowu Xu's avatar
Yaowu Xu committed
270
}
271
#endif
Yaowu Xu's avatar
Yaowu Xu committed
272

Steinar Midtskogen's avatar
Steinar Midtskogen committed
273 274 275 276 277 278 279
/* Compute the primary filter strength for an 8x8 block based on the
   directional variance difference. A high variance difference means
   that we have a highly directional pattern (e.g. a high contrast
   edge), so we can apply more deringing. A low variance means that we
   either have a low contrast edge, or a non-directional texture, so
   we want to be careful not to blur. */
static INLINE int adjust_strength(int strength, int32_t var) {
280
  const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
281 282
  /* We use the variance of 8x8 blocks to adjust the strength. */
  return var ? (strength * (4 + i) + 8) >> 4 : 0;
Yaowu Xu's avatar
Yaowu Xu committed
283 284
}

285
#if !CONFIG_CDEF_SINGLEPASS
286 287
void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
                               int sstride) {
288 289
  int i, j;
  for (i = 0; i < 8; i++)
290
    for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
291 292
}

293 294
void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
                               int sstride) {
295 296
  int i, j;
  for (i = 0; i < 4; i++)
297
    for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
298 299
}

Steinar Midtskogen's avatar
Steinar Midtskogen committed
300 301 302
static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
                                      cdef_list *dlist, int cdef_count,
                                      int bsize) {
303
  int bi, bx, by;
304

305
  if (bsize == BLOCK_8X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
306
    for (bi = 0; bi < cdef_count; bi++) {
307 308
      by = dlist[bi].by;
      bx = dlist[bi].bx;
309
      copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
310 311 312
                              &src[bi << (3 + 3)], 8);
    }
  } else if (bsize == BLOCK_4X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
313
    for (bi = 0; bi < cdef_count; bi++) {
314 315 316 317 318 319 320 321
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
                              &src[bi << (3 + 2)], 4);
      copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
                              dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
    }
  } else if (bsize == BLOCK_8X4) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
322
    for (bi = 0; bi < cdef_count; bi++) {
323 324 325 326 327 328
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
                              &src[bi << (2 + 3)], 8);
      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
                              dstride, &src[(bi << (2 + 3)) + 4], 8);
329 330
    }
  } else {
331
    assert(bsize == BLOCK_4X4);
Steinar Midtskogen's avatar
Steinar Midtskogen committed
332
    for (bi = 0; bi < cdef_count; bi++) {
333 334
      by = dlist[bi].by;
      bx = dlist[bi].bx;
335
      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
336
                              &src[bi << (2 + 2)], 4);
337 338 339 340
    }
  }
}

341 342
void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
                              int sstride) {
343 344 345 346 347 348
  int i, j;
  for (i = 0; i < 8; i++)
    for (j = 0; j < 8; j++)
      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}

349 350
void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
                              int sstride) {
351 352 353 354 355 356
  int i, j;
  for (i = 0; i < 4; i++)
    for (j = 0; j < 4; j++)
      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}

Steinar Midtskogen's avatar
Steinar Midtskogen committed
357 358 359
static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
                                     const uint16_t *src, cdef_list *dlist,
                                     int cdef_count, int bsize) {
360
  int bi, bx, by;
361
  if (bsize == BLOCK_8X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
362
    for (bi = 0; bi < cdef_count; bi++) {
363 364 365
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
366 367 368
                             &src[bi << (3 + 3)], 8);
    }
  } else if (bsize == BLOCK_4X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
369
    for (bi = 0; bi < cdef_count; bi++) {
370 371 372 373 374 375 376 377
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
                             &src[bi << (3 + 2)], 4);
      copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
                             dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
    }
  } else if (bsize == BLOCK_8X4) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
378
    for (bi = 0; bi < cdef_count; bi++) {
379 380 381 382 383 384
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
                             &src[bi << (2 + 3)], 8);
      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
                             &src[(bi << (2 + 3)) + 4], 8);
385 386
    }
  } else {
387
    assert(bsize == BLOCK_4X4);
Steinar Midtskogen's avatar
Steinar Midtskogen committed
388
    for (bi = 0; bi < cdef_count; bi++) {
389 390 391
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
392
                             &src[bi << (2 * 2)], 4);
393 394 395 396
    }
  }
}

397 398 399 400 401 402
int get_filter_skip(int level) {
  int filter_skip = level & 1;
  if (level == 1) filter_skip = 0;
  return filter_skip;
}

Steinar Midtskogen's avatar
Steinar Midtskogen committed
403 404 405 406 407 408
void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                    cdef_list *dlist, int cdef_count, int level,
                    int sec_strength, int sec_damping, int pri_damping,
                    int coeff_shift, int skip_dering, int hbd) {
409 410 411 412 413 414 415 416 417
#else

void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                    cdef_list *dlist, int cdef_count, int level,
                    int sec_strength, int pri_damping, int sec_damping,
                    int coeff_shift) {
#endif
418
  int bi;
Yaowu Xu's avatar
Yaowu Xu committed
419 420
  int bx;
  int by;
421
  int bsize, bsizex, bsizey;
422

423 424 425
#if CONFIG_CDEF_SINGLEPASS
  int pri_strength = (level >> 1) << coeff_shift;
  int filter_skip = level & 1;
426
  sec_strength <<= coeff_shift;
427 428 429 430 431
  if (!pri_strength && !sec_strength && filter_skip) {
    pri_strength = 19 << coeff_shift;
    sec_strength = 7 << coeff_shift;
  }
#else
432
  int threshold = (level >> 1) << coeff_shift;
433 434
  int filter_skip = get_filter_skip(level);
  if (level == 1) threshold = 31 << coeff_shift;
435

Steinar Midtskogen's avatar
Steinar Midtskogen committed
436 437
  cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
                                           cdef_direction_8x8 };
438
#endif
Steinar Midtskogen's avatar
Steinar Midtskogen committed
439 440
  sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
  pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
441 442 443 444
  bsize =
      ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
  bsizex = 3 - xdec;
  bsizey = 3 - ydec;
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
#if CONFIG_CDEF_SINGLEPASS
  if (dirinit && pri_strength == 0 && sec_strength == 0)
#else
  if (!skip_dering)
#endif
  {
#if CONFIG_CDEF_SINGLEPASS
    // If we're here, both primary and secondary strengths are 0, and
    // we still haven't written anything to y[] yet, so we just copy
    // the input to y[]. This is necessary only for av1_cdef_search()
    // and only av1_cdef_search() sets dirinit.
    for (bi = 0; bi < cdef_count; bi++) {
      by = dlist[bi].by;
      bx = dlist[bi].bx;
#else
460 461
    if (pli == 0) {
      if (!dirinit || !*dirinit) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
462
        for (bi = 0; bi < cdef_count; bi++) {
463 464
          by = dlist[bi].by;
          bx = dlist[bi].bx;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
465 466
          dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
                                      CDEF_BSTRIDE, &var[by][bx], coeff_shift);
467 468 469
        }
        if (dirinit) *dirinit = 1;
      }
470 471 472 473 474 475
    }
    // Only run dering for non-zero threshold (which is always the case for
    // 4:2:2 or 4:4:0). If we don't dering, we still need to eventually write
    // something out in y[] later.
    if (threshold != 0) {
      assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
Steinar Midtskogen's avatar
Steinar Midtskogen committed
476
      for (bi = 0; bi < cdef_count; bi++) {
477
        int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
478 479
        by = dlist[bi].by;
        bx = dlist[bi].bx;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
480
        (cdef_direction[bsize == BLOCK_8X8])(
481
            &y[bi << (bsizex + bsizey)], 1 << bsizex,
Steinar Midtskogen's avatar
Steinar Midtskogen committed
482 483 484
            &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
            pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
            pri_damping);
485
      }
Yaowu Xu's avatar
Yaowu Xu committed
486 487
    }
  }
488

Steinar Midtskogen's avatar
Steinar Midtskogen committed
489
  if (sec_strength) {
490
    if (threshold && !skip_dering)
Steinar Midtskogen's avatar
Steinar Midtskogen committed
491 492
      copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
    for (bi = 0; bi < cdef_count; bi++) {
493 494
      by = dlist[bi].by;
      bx = dlist[bi].bx;
495 496
      int py = by << bsizey;
      int px = bx << bsizex;
497

498
      if (!filter_skip && dlist[bi].skip) continue;
499 500 501 502
      if (!dst || hbd) {
        // 16 bit destination if high bitdepth or 8 bit destination not given
        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
                                                        : aom_clpf_hblock_hbd)(
503 504
            dst ? (uint16_t *)dst + py * dstride + px
                : &y[bi << (bsizex + bsizey)],
Steinar Midtskogen's avatar
Steinar Midtskogen committed
505 506 507
            in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
            sec_damping);
508 509 510 511
      } else {
        // Do clpf and write the result to an 8 bit destination
        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
                                                        : aom_clpf_hblock)(
Steinar Midtskogen's avatar
Steinar Midtskogen committed
512 513 514
            dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
            sec_damping);
515
      }
516
    }
517
  } else if (threshold != 0) {
518
    // No clpf, so copy instead
519
    if (hbd) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
520 521
      copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
                                bsize);
522
    } else {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
523
      copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
524
    }
525 526 527 528
  } else if (dirinit) {
    // If we're here, both dering and clpf are off, and we still haven't written
    // anything to y[] yet, so we just copy the input to y[]. This is necessary
    // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
Steinar Midtskogen's avatar
Steinar Midtskogen committed
529
    for (bi = 0; bi < cdef_count; bi++) {
530 531
      by = dlist[bi].by;
      bx = dlist[bi].bx;
532
#endif
533 534 535 536
      int iy, ix;
      // TODO(stemidts/jmvalin): SIMD optimisations
      for (iy = 0; iy < 1 << bsizey; iy++)
        for (ix = 0; ix < 1 << bsizex; ix++)
537 538 539
#if CONFIG_CDEF_SINGLEPASS
          dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
#else
540
          y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
541
#endif
Steinar Midtskogen's avatar
Steinar Midtskogen committed
542
              in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
543
    }
544 545 546
#if CONFIG_CDEF_SINGLEPASS
    return;
#endif
547
  }
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575

#if CONFIG_CDEF_SINGLEPASS
  if (pli == 0) {
    if (!dirinit || !*dirinit) {
      for (bi = 0; bi < cdef_count; bi++) {
        by = dlist[bi].by;
        bx = dlist[bi].bx;
        dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
                                    CDEF_BSTRIDE, &var[by][bx], coeff_shift);
      }
      if (dirinit) *dirinit = 1;
    }
  }

  assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
  for (bi = 0; bi < cdef_count; bi++) {
    int t = !filter_skip && dlist[bi].skip ? 0 : pri_strength;
    int s = !filter_skip && dlist[bi].skip ? 0 : sec_strength;
    by = dlist[bi].by;
    bx = dlist[bi].bx;
    if (dst8)
      cdef_filter_block(
          &dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL, dstride,
          &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
          (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
          pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
    else
      cdef_filter_block(
clang-format's avatar
clang-format committed
576 577 578
          NULL,
          &dst16[dirinit ? bi << (bsizex + bsizey)
                         : (by << bsizey) * dstride + (bx << bsizex)],
579 580 581 582 583 584
          dirinit ? 1 << bsizex : dstride,
          &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
          (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
          pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
  }
#endif
Yaowu Xu's avatar
Yaowu Xu committed
585
}