cdef_block.c 21.1 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8 9 10
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */
11 12 13 14

#include <math.h>
#include <stdlib.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#ifdef HAVE_CONFIG_H
16
#include "./config.h"
Yaowu Xu's avatar
Yaowu Xu committed
17 18
#endif

19
#include "./aom_dsp_rtcd.h"
20
#include "./av1_rtcd.h"
21
#include "./cdef.h"
Yaowu Xu's avatar
Yaowu Xu committed
22 23

/* Generated from gen_filter_tables.c. */
24
#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
Steinar Midtskogen's avatar
Steinar Midtskogen committed
25 26 27 28 29 30 31 32 33
const int cdef_directions[8][3] = {
  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 },
  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
Yaowu Xu's avatar
Yaowu Xu committed
34
};
35 36 37 38 39 40 41 42 43 44 45 46
#else
const int cdef_directions[8][2] = {
  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
};
#endif
Yaowu Xu's avatar
Yaowu Xu committed
47 48 49 50 51 52 53 54

/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
   The search minimizes the weighted variance along all the lines in a
   particular direction, i.e. the squared error between the input and a
   "predicted" block where each pixel is replaced by the average along a line
   in a particular direction. Since each direction have the same sum(x^2) term,
   that term is never computed. See Section 2, step 2, of:
   http://jmvalin.ca/notes/intra_paint.pdf */
Steinar Midtskogen's avatar
Steinar Midtskogen committed
55 56
int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
                    int coeff_shift) {
Yaowu Xu's avatar
Yaowu Xu committed
57
  int i;
clang-format's avatar
clang-format committed
58 59
  int32_t cost[8] = { 0 };
  int partial[8][15] = { { 0 } };
Yaowu Xu's avatar
Yaowu Xu committed
60 61 62 63 64
  int32_t best_cost = 0;
  int best_dir = 0;
  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
     The output is then 840 times larger, but we don't care for finding
     the max. */
clang-format's avatar
clang-format committed
65
  static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
Yaowu Xu's avatar
Yaowu Xu committed
66 67 68 69 70 71
  for (i = 0; i < 8; i++) {
    int j;
    for (j = 0; j < 8; j++) {
      int x;
      /* We subtract 128 here to reduce the maximum range of the squared
         partial sums. */
clang-format's avatar
clang-format committed
72
      x = (img[i * stride + j] >> coeff_shift) - 128;
Yaowu Xu's avatar
Yaowu Xu committed
73
      partial[0][i + j] += x;
clang-format's avatar
clang-format committed
74
      partial[1][i + j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
75
      partial[2][i] += x;
clang-format's avatar
clang-format committed
76
      partial[3][3 + i - j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
77
      partial[4][7 + i - j] += x;
clang-format's avatar
clang-format committed
78
      partial[5][3 - i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
79
      partial[6][j] += x;
clang-format's avatar
clang-format committed
80
      partial[7][i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
81 82 83
    }
  }
  for (i = 0; i < 8; i++) {
clang-format's avatar
clang-format committed
84 85
    cost[2] += partial[2][i] * partial[2][i];
    cost[6] += partial[6][i] * partial[6][i];
Yaowu Xu's avatar
Yaowu Xu committed
86 87 88 89
  }
  cost[2] *= div_table[8];
  cost[6] *= div_table[8];
  for (i = 0; i < 7; i++) {
clang-format's avatar
clang-format committed
90 91 92 93 94 95
    cost[0] += (partial[0][i] * partial[0][i] +
                partial[0][14 - i] * partial[0][14 - i]) *
               div_table[i + 1];
    cost[4] += (partial[4][i] * partial[4][i] +
                partial[4][14 - i] * partial[4][14 - i]) *
               div_table[i + 1];
Yaowu Xu's avatar
Yaowu Xu committed
96
  }
clang-format's avatar
clang-format committed
97 98
  cost[0] += partial[0][7] * partial[0][7] * div_table[8];
  cost[4] += partial[4][7] * partial[4][7] * div_table[8];
Yaowu Xu's avatar
Yaowu Xu committed
99 100 101
  for (i = 1; i < 8; i += 2) {
    int j;
    for (j = 0; j < 4 + 1; j++) {
clang-format's avatar
clang-format committed
102
      cost[i] += partial[i][3 + j] * partial[i][3 + j];
Yaowu Xu's avatar
Yaowu Xu committed
103 104 105
    }
    cost[i] *= div_table[8];
    for (j = 0; j < 4 - 1; j++) {
clang-format's avatar
clang-format committed
106 107 108
      cost[i] += (partial[i][j] * partial[i][j] +
                  partial[i][10 - j] * partial[i][10 - j]) *
                 div_table[2 * j + 2];
Yaowu Xu's avatar
Yaowu Xu committed
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
    }
  }
  for (i = 0; i < 8; i++) {
    if (cost[i] > best_cost) {
      best_cost = cost[i];
      best_dir = i;
    }
  }
  /* Difference between the optimal variance and the variance along the
     orthogonal direction. Again, the sum(x^2) terms cancel out. */
  *var = best_cost - cost[(best_dir + 4) & 7];
  /* We'd normally divide by 840, but dividing by 1024 is close enough
     for what we're going to do with this. */
  *var >>= 10;
  return best_dir;
}

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
#if CONFIG_CDEF_SINGLEPASS
#if CDEF_FULL
const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
#else
const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
#endif

/* Smooth in the direction detected. */
#if CDEF_CAP
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
                         const uint16_t *in, int pri_strength, int sec_strength,
                         int dir, int pri_damping, int sec_damping, int bsize,
                         UNUSED int max_unused)
#else
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
                         const uint16_t *in, int pri_strength, int sec_strength,
                         int dir, int pri_damping, int sec_damping, int bsize,
                         int max)
#endif
{
  int i, j, k;
  const int s = CDEF_BSTRIDE;
  const int *pri_taps = cdef_pri_taps[pri_strength & 1];
  const int *sec_taps = cdef_sec_taps[pri_strength & 1];
  for (i = 0; i < 4 << (bsize == BLOCK_8X8); i++) {
    for (j = 0; j < 4 << (bsize == BLOCK_8X8); j++) {
      int16_t sum = 0;
      int16_t y;
      int16_t x = in[i * s + j];
#if CDEF_CAP
      int max = x;
      int min = x;
#endif
#if CDEF_FULL
      for (k = 0; k < 3; k++)
#else
      for (k = 0; k < 2; k++)
#endif
      {
        int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
        int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
        sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
        sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
#if CDEF_CAP
        if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
        if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
        min = AOMMIN(p0, min);
        min = AOMMIN(p1, min);
#endif
#if CDEF_FULL
        if (k == 2) continue;
#endif
        int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
        int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
        int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
        int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
#if CDEF_CAP
        if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
        if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
        if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
        if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
        min = AOMMIN(s0, min);
        min = AOMMIN(s1, min);
        min = AOMMIN(s2, min);
        min = AOMMIN(s3, min);
#endif
        sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
      }
#if CDEF_CAP
      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
#else
      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), 0, max);
#endif
      if (dst8)
        dst8[i * dstride + j] = (uint8_t)y;
      else
        dst16[i * dstride + j] = (uint16_t)y;
    }
  }
}

#else

Yaowu Xu's avatar
Yaowu Xu committed
214
/* Smooth in the direction detected. */
Steinar Midtskogen's avatar
Steinar Midtskogen committed
215 216
void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
                          int threshold, int dir, int damping) {
Yaowu Xu's avatar
Yaowu Xu committed
217 218 219
  int i;
  int j;
  int k;
220
  static const int taps[3] = { 3, 2, 1 };
221 222
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++) {
Yaowu Xu's avatar
Yaowu Xu committed
223 224 225
      int16_t sum;
      int16_t xx;
      int16_t yy;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
226
      xx = in[i * CDEF_BSTRIDE + j];
clang-format's avatar
clang-format committed
227
      sum = 0;
Yaowu Xu's avatar
Yaowu Xu committed
228 229 230
      for (k = 0; k < 3; k++) {
        int16_t p0;
        int16_t p1;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
231 232
        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
233 234
        sum += taps[k] * constrain(p0, threshold, damping);
        sum += taps[k] * constrain(p1, threshold, damping);
Yaowu Xu's avatar
Yaowu Xu committed
235
      }
236 237
      sum = (sum + 8) >> 4;
      yy = xx + sum;
clang-format's avatar
clang-format committed
238
      y[i * ystride + j] = yy;
Yaowu Xu's avatar
Yaowu Xu committed
239 240 241 242
    }
  }
}

243
/* Smooth in the direction detected. */
Steinar Midtskogen's avatar
Steinar Midtskogen committed
244 245
void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
                          int threshold, int dir, int damping) {
246 247 248 249 250 251 252 253 254
  int i;
  int j;
  int k;
  static const int taps[2] = { 4, 1 };
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      int16_t sum;
      int16_t xx;
      int16_t yy;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
255
      xx = in[i * CDEF_BSTRIDE + j];
256 257 258 259
      sum = 0;
      for (k = 0; k < 2; k++) {
        int16_t p0;
        int16_t p1;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
260 261
        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
262 263
        sum += taps[k] * constrain(p0, threshold, damping);
        sum += taps[k] * constrain(p1, threshold, damping);
264
      }
265 266
      sum = (sum + 8) >> 4;
      yy = xx + sum;
267 268 269
      y[i * ystride + j] = yy;
    }
  }
Yaowu Xu's avatar
Yaowu Xu committed
270
}
271
#endif
Yaowu Xu's avatar
Yaowu Xu committed
272

Steinar Midtskogen's avatar
Steinar Midtskogen committed
273 274 275 276 277 278 279
/* Compute the primary filter strength for an 8x8 block based on the
   directional variance difference. A high variance difference means
   that we have a highly directional pattern (e.g. a high contrast
   edge), so we can apply more deringing. A low variance means that we
   either have a low contrast edge, or a non-directional texture, so
   we want to be careful not to blur. */
static INLINE int adjust_strength(int strength, int32_t var) {
280
  const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
281 282
  /* We use the variance of 8x8 blocks to adjust the strength. */
  return var ? (strength * (4 + i) + 8) >> 4 : 0;
Yaowu Xu's avatar
Yaowu Xu committed
283 284
}

285
#if !CONFIG_CDEF_SINGLEPASS
286 287
void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
                               int sstride) {
288 289
  int i, j;
  for (i = 0; i < 8; i++)
290
    for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
291 292
}

293 294
void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
                               int sstride) {
295 296
  int i, j;
  for (i = 0; i < 4; i++)
297
    for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
298 299
}

Steinar Midtskogen's avatar
Steinar Midtskogen committed
300 301 302
static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
                                      cdef_list *dlist, int cdef_count,
                                      int bsize) {
303
  int bi, bx, by;
304

305
  if (bsize == BLOCK_8X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
306
    for (bi = 0; bi < cdef_count; bi++) {
307 308
      by = dlist[bi].by;
      bx = dlist[bi].bx;
309
      copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
310 311 312
                              &src[bi << (3 + 3)], 8);
    }
  } else if (bsize == BLOCK_4X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
313
    for (bi = 0; bi < cdef_count; bi++) {
314 315 316 317 318 319 320 321
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
                              &src[bi << (3 + 2)], 4);
      copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
                              dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
    }
  } else if (bsize == BLOCK_8X4) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
322
    for (bi = 0; bi < cdef_count; bi++) {
323 324 325 326 327 328
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
                              &src[bi << (2 + 3)], 8);
      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
                              dstride, &src[(bi << (2 + 3)) + 4], 8);
329 330
    }
  } else {
331
    assert(bsize == BLOCK_4X4);
Steinar Midtskogen's avatar
Steinar Midtskogen committed
332
    for (bi = 0; bi < cdef_count; bi++) {
333 334
      by = dlist[bi].by;
      bx = dlist[bi].bx;
335
      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
336
                              &src[bi << (2 + 2)], 4);
337 338 339 340
    }
  }
}

341 342
void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
                              int sstride) {
343 344 345 346 347 348
  int i, j;
  for (i = 0; i < 8; i++)
    for (j = 0; j < 8; j++)
      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}

349 350
void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
                              int sstride) {
351 352 353 354 355 356
  int i, j;
  for (i = 0; i < 4; i++)
    for (j = 0; j < 4; j++)
      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
}

Steinar Midtskogen's avatar
Steinar Midtskogen committed
357 358 359
static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
                                     const uint16_t *src, cdef_list *dlist,
                                     int cdef_count, int bsize) {
360
  int bi, bx, by;
361
  if (bsize == BLOCK_8X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
362
    for (bi = 0; bi < cdef_count; bi++) {
363 364 365
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
366 367 368
                             &src[bi << (3 + 3)], 8);
    }
  } else if (bsize == BLOCK_4X8) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
369
    for (bi = 0; bi < cdef_count; bi++) {
370 371 372 373 374 375 376 377
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
                             &src[bi << (3 + 2)], 4);
      copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
                             dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
    }
  } else if (bsize == BLOCK_8X4) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
378
    for (bi = 0; bi < cdef_count; bi++) {
379 380 381 382 383 384
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
                             &src[bi << (2 + 3)], 8);
      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
                             &src[(bi << (2 + 3)) + 4], 8);
385 386
    }
  } else {
387
    assert(bsize == BLOCK_4X4);
Steinar Midtskogen's avatar
Steinar Midtskogen committed
388
    for (bi = 0; bi < cdef_count; bi++) {
389 390 391
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
392
                             &src[bi << (2 * 2)], 4);
393 394 395 396
    }
  }
}

397 398 399 400 401 402
int get_filter_skip(int level) {
  int filter_skip = level & 1;
  if (level == 1) filter_skip = 0;
  return filter_skip;
}

Steinar Midtskogen's avatar
Steinar Midtskogen committed
403 404 405 406 407 408
void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                    cdef_list *dlist, int cdef_count, int level,
                    int sec_strength, int sec_damping, int pri_damping,
                    int coeff_shift, int skip_dering, int hbd) {
409 410 411 412 413 414 415 416 417
#else

void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                    cdef_list *dlist, int cdef_count, int level,
                    int sec_strength, int pri_damping, int sec_damping,
                    int coeff_shift) {
#endif
418
  int bi;
Yaowu Xu's avatar
Yaowu Xu committed
419 420
  int bx;
  int by;
421
  int bsize, bsizex, bsizey;
422

423 424 425 426 427 428 429 430
#if CONFIG_CDEF_SINGLEPASS
  int pri_strength = (level >> 1) << coeff_shift;
  int filter_skip = level & 1;
  if (!pri_strength && !sec_strength && filter_skip) {
    pri_strength = 19 << coeff_shift;
    sec_strength = 7 << coeff_shift;
  }
#else
431
  int threshold = (level >> 1) << coeff_shift;
432 433
  int filter_skip = get_filter_skip(level);
  if (level == 1) threshold = 31 << coeff_shift;
434

Steinar Midtskogen's avatar
Steinar Midtskogen committed
435 436
  cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
                                           cdef_direction_8x8 };
437
#endif
Steinar Midtskogen's avatar
Steinar Midtskogen committed
438 439
  sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
  pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
440 441 442 443
  bsize =
      ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
  bsizex = 3 - xdec;
  bsizey = 3 - ydec;
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
#if CONFIG_CDEF_SINGLEPASS
  if (dirinit && pri_strength == 0 && sec_strength == 0)
#else
  if (!skip_dering)
#endif
  {
#if CONFIG_CDEF_SINGLEPASS
    // If we're here, both primary and secondary strengths are 0, and
    // we still haven't written anything to y[] yet, so we just copy
    // the input to y[]. This is necessary only for av1_cdef_search()
    // and only av1_cdef_search() sets dirinit.
    for (bi = 0; bi < cdef_count; bi++) {
      by = dlist[bi].by;
      bx = dlist[bi].bx;
#else
459 460
    if (pli == 0) {
      if (!dirinit || !*dirinit) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
461
        for (bi = 0; bi < cdef_count; bi++) {
462 463
          by = dlist[bi].by;
          bx = dlist[bi].bx;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
464 465
          dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
                                      CDEF_BSTRIDE, &var[by][bx], coeff_shift);
466 467 468
        }
        if (dirinit) *dirinit = 1;
      }
469 470 471 472 473 474
    }
    // Only run dering for non-zero threshold (which is always the case for
    // 4:2:2 or 4:4:0). If we don't dering, we still need to eventually write
    // something out in y[] later.
    if (threshold != 0) {
      assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
Steinar Midtskogen's avatar
Steinar Midtskogen committed
475
      for (bi = 0; bi < cdef_count; bi++) {
476
        int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
477 478
        by = dlist[bi].by;
        bx = dlist[bi].bx;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
479
        (cdef_direction[bsize == BLOCK_8X8])(
480
            &y[bi << (bsizex + bsizey)], 1 << bsizex,
Steinar Midtskogen's avatar
Steinar Midtskogen committed
481 482 483
            &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
            pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
            pri_damping);
484
      }
Yaowu Xu's avatar
Yaowu Xu committed
485 486
    }
  }
487

Steinar Midtskogen's avatar
Steinar Midtskogen committed
488
  if (sec_strength) {
489
    if (threshold && !skip_dering)
Steinar Midtskogen's avatar
Steinar Midtskogen committed
490 491
      copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
    for (bi = 0; bi < cdef_count; bi++) {
492 493
      by = dlist[bi].by;
      bx = dlist[bi].bx;
494 495
      int py = by << bsizey;
      int px = bx << bsizex;
496

497
      if (!filter_skip && dlist[bi].skip) continue;
498 499 500 501
      if (!dst || hbd) {
        // 16 bit destination if high bitdepth or 8 bit destination not given
        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
                                                        : aom_clpf_hblock_hbd)(
502 503
            dst ? (uint16_t *)dst + py * dstride + px
                : &y[bi << (bsizex + bsizey)],
Steinar Midtskogen's avatar
Steinar Midtskogen committed
504 505 506
            in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
            sec_damping);
507 508 509 510
      } else {
        // Do clpf and write the result to an 8 bit destination
        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
                                                        : aom_clpf_hblock)(
Steinar Midtskogen's avatar
Steinar Midtskogen committed
511 512 513
            dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
            sec_damping);
514
      }
515
    }
516
  } else if (threshold != 0) {
517
    // No clpf, so copy instead
518
    if (hbd) {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
519 520
      copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
                                bsize);
521
    } else {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
522
      copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
523
    }
524 525 526 527
  } else if (dirinit) {
    // If we're here, both dering and clpf are off, and we still haven't written
    // anything to y[] yet, so we just copy the input to y[]. This is necessary
    // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
Steinar Midtskogen's avatar
Steinar Midtskogen committed
528
    for (bi = 0; bi < cdef_count; bi++) {
529 530
      by = dlist[bi].by;
      bx = dlist[bi].bx;
531
#endif
532 533 534 535
      int iy, ix;
      // TODO(stemidts/jmvalin): SIMD optimisations
      for (iy = 0; iy < 1 << bsizey; iy++)
        for (ix = 0; ix < 1 << bsizex; ix++)
536 537 538
#if CONFIG_CDEF_SINGLEPASS
          dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
#else
539
          y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
540
#endif
Steinar Midtskogen's avatar
Steinar Midtskogen committed
541
              in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
542
    }
543 544 545
#if CONFIG_CDEF_SINGLEPASS
    return;
#endif
546
  }
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582

#if CONFIG_CDEF_SINGLEPASS
  if (pli == 0) {
    if (!dirinit || !*dirinit) {
      for (bi = 0; bi < cdef_count; bi++) {
        by = dlist[bi].by;
        bx = dlist[bi].bx;
        dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
                                    CDEF_BSTRIDE, &var[by][bx], coeff_shift);
      }
      if (dirinit) *dirinit = 1;
    }
  }

  assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
  for (bi = 0; bi < cdef_count; bi++) {
    int t = !filter_skip && dlist[bi].skip ? 0 : pri_strength;
    int s = !filter_skip && dlist[bi].skip ? 0 : sec_strength;
    by = dlist[bi].by;
    bx = dlist[bi].bx;
    if (dst8)
      cdef_filter_block(
          &dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL, dstride,
          &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
          (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
          pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
    else
      cdef_filter_block(
          NULL, &dst16[dirinit ? bi << (bsizex + bsizey)
                               : (by << bsizey) * dstride + (bx << bsizex)],
          dirinit ? 1 << bsizex : dstride,
          &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
          (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
          pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
  }
#endif
Yaowu Xu's avatar
Yaowu Xu committed
583
}