cdef_block.c 11.6 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8 9 10
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */
11 12 13 14

#include <math.h>
#include <stdlib.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#ifdef HAVE_CONFIG_H
16
#include "./config.h"
Yaowu Xu's avatar
Yaowu Xu committed
17 18
#endif

19
#include "./aom_dsp_rtcd.h"
20
#include "./av1_rtcd.h"
21
#include "./cdef.h"
Yaowu Xu's avatar
Yaowu Xu committed
22 23

/* Generated from gen_filter_tables.c. */
24
#if CDEF_FULL
Yaowu Xu's avatar
Yaowu Xu committed
25
DECLARE_ALIGNED(16, const int, cdef_directions[8][3]) = {
Steinar Midtskogen's avatar
Steinar Midtskogen committed
26 27 28 29 30 31 32 33
  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 },
  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 },
  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
Yaowu Xu's avatar
Yaowu Xu committed
34
};
35
#else
Yaowu Xu's avatar
Yaowu Xu committed
36
DECLARE_ALIGNED(16, const int, cdef_directions[8][2]) = {
37 38 39 40 41 42 43 44 45 46
  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
};
#endif
Yaowu Xu's avatar
Yaowu Xu committed
47 48 49 50 51 52 53 54

/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
   The search minimizes the weighted variance along all the lines in a
   particular direction, i.e. the squared error between the input and a
   "predicted" block where each pixel is replaced by the average along a line
   in a particular direction. Since each direction have the same sum(x^2) term,
   that term is never computed. See Section 2, step 2, of:
   http://jmvalin.ca/notes/intra_paint.pdf */
Steinar Midtskogen's avatar
Steinar Midtskogen committed
55 56
int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
                    int coeff_shift) {
Yaowu Xu's avatar
Yaowu Xu committed
57
  int i;
clang-format's avatar
clang-format committed
58 59
  int32_t cost[8] = { 0 };
  int partial[8][15] = { { 0 } };
Yaowu Xu's avatar
Yaowu Xu committed
60 61 62 63 64
  int32_t best_cost = 0;
  int best_dir = 0;
  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
     The output is then 840 times larger, but we don't care for finding
     the max. */
clang-format's avatar
clang-format committed
65
  static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
Yaowu Xu's avatar
Yaowu Xu committed
66 67 68 69 70 71
  for (i = 0; i < 8; i++) {
    int j;
    for (j = 0; j < 8; j++) {
      int x;
      /* We subtract 128 here to reduce the maximum range of the squared
         partial sums. */
clang-format's avatar
clang-format committed
72
      x = (img[i * stride + j] >> coeff_shift) - 128;
Yaowu Xu's avatar
Yaowu Xu committed
73
      partial[0][i + j] += x;
clang-format's avatar
clang-format committed
74
      partial[1][i + j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
75
      partial[2][i] += x;
clang-format's avatar
clang-format committed
76
      partial[3][3 + i - j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
77
      partial[4][7 + i - j] += x;
clang-format's avatar
clang-format committed
78
      partial[5][3 - i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
79
      partial[6][j] += x;
clang-format's avatar
clang-format committed
80
      partial[7][i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
81 82 83
    }
  }
  for (i = 0; i < 8; i++) {
clang-format's avatar
clang-format committed
84 85
    cost[2] += partial[2][i] * partial[2][i];
    cost[6] += partial[6][i] * partial[6][i];
Yaowu Xu's avatar
Yaowu Xu committed
86 87 88 89
  }
  cost[2] *= div_table[8];
  cost[6] *= div_table[8];
  for (i = 0; i < 7; i++) {
clang-format's avatar
clang-format committed
90 91 92 93 94 95
    cost[0] += (partial[0][i] * partial[0][i] +
                partial[0][14 - i] * partial[0][14 - i]) *
               div_table[i + 1];
    cost[4] += (partial[4][i] * partial[4][i] +
                partial[4][14 - i] * partial[4][14 - i]) *
               div_table[i + 1];
Yaowu Xu's avatar
Yaowu Xu committed
96
  }
clang-format's avatar
clang-format committed
97 98
  cost[0] += partial[0][7] * partial[0][7] * div_table[8];
  cost[4] += partial[4][7] * partial[4][7] * div_table[8];
Yaowu Xu's avatar
Yaowu Xu committed
99 100 101
  for (i = 1; i < 8; i += 2) {
    int j;
    for (j = 0; j < 4 + 1; j++) {
clang-format's avatar
clang-format committed
102
      cost[i] += partial[i][3 + j] * partial[i][3 + j];
Yaowu Xu's avatar
Yaowu Xu committed
103 104 105
    }
    cost[i] *= div_table[8];
    for (j = 0; j < 4 - 1; j++) {
clang-format's avatar
clang-format committed
106 107 108
      cost[i] += (partial[i][j] * partial[i][j] +
                  partial[i][10 - j] * partial[i][10 - j]) *
                 div_table[2 * j + 2];
Yaowu Xu's avatar
Yaowu Xu committed
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
    }
  }
  for (i = 0; i < 8; i++) {
    if (cost[i] > best_cost) {
      best_cost = cost[i];
      best_dir = i;
    }
  }
  /* Difference between the optimal variance and the variance along the
     orthogonal direction. Again, the sum(x^2) terms cancel out. */
  *var = best_cost - cost[(best_dir + 4) & 7];
  /* We'd normally divide by 840, but dividing by 1024 is close enough
     for what we're going to do with this. */
  *var >>= 10;
  return best_dir;
}

126 127 128 129 130 131 132 133 134 135 136 137 138
#if CDEF_FULL
const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
#else
const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
#endif

/* Smooth in the direction detected. */
#if CDEF_CAP
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
                         const uint16_t *in, int pri_strength, int sec_strength,
                         int dir, int pri_damping, int sec_damping, int bsize,
139
                         AOM_UNUSED int max_unused, int coeff_shift)
140 141 142 143
#else
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
                         const uint16_t *in, int pri_strength, int sec_strength,
                         int dir, int pri_damping, int sec_damping, int bsize,
144
                         int max, int coeff_shift)
145 146 147 148
#endif
{
  int i, j, k;
  const int s = CDEF_BSTRIDE;
149 150
  const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
  const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
151 152
  for (i = 0; i < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_4X8); i++) {
    for (j = 0; j < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_8X4); j++) {
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
      int16_t sum = 0;
      int16_t y;
      int16_t x = in[i * s + j];
#if CDEF_CAP
      int max = x;
      int min = x;
#endif
#if CDEF_FULL
      for (k = 0; k < 3; k++)
#else
      for (k = 0; k < 2; k++)
#endif
      {
        int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
        int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
        sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
        sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
#if CDEF_CAP
        if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
        if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
        min = AOMMIN(p0, min);
        min = AOMMIN(p1, min);
#endif
#if CDEF_FULL
        if (k == 2) continue;
#endif
        int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
        int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
        int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
        int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
#if CDEF_CAP
        if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
        if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
        if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
        if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
        min = AOMMIN(s0, min);
        min = AOMMIN(s1, min);
        min = AOMMIN(s2, min);
        min = AOMMIN(s3, min);
#endif
        sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
        sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
      }
#if CDEF_CAP
      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
#else
      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), 0, max);
#endif
      if (dst8)
        dst8[i * dstride + j] = (uint8_t)y;
      else
        dst16[i * dstride + j] = (uint16_t)y;
    }
  }
}

Steinar Midtskogen's avatar
Steinar Midtskogen committed
211 212 213 214 215 216 217
/* Compute the primary filter strength for an 8x8 block based on the
   directional variance difference. A high variance difference means
   that we have a highly directional pattern (e.g. a high contrast
   edge), so we can apply more deringing. A low variance means that we
   either have a low contrast edge, or a non-directional texture, so
   we want to be careful not to blur. */
static INLINE int adjust_strength(int strength, int32_t var) {
218
  const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
219 220
  /* We use the variance of 8x8 blocks to adjust the strength. */
  return var ? (strength * (4 + i) + 8) >> 4 : 0;
Yaowu Xu's avatar
Yaowu Xu committed
221 222
}

223 224 225 226 227 228
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                    cdef_list *dlist, int cdef_count, int level,
                    int sec_strength, int pri_damping, int sec_damping,
                    int coeff_shift) {
229
  int bi;
Yaowu Xu's avatar
Yaowu Xu committed
230 231
  int bx;
  int by;
232
  int bsize, bsizex, bsizey;
233

234
  int pri_strength = level << coeff_shift;
235
  sec_strength <<= coeff_shift;
Steinar Midtskogen's avatar
Steinar Midtskogen committed
236 237
  sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
  pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
238 239 240 241
  bsize =
      ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
  bsizex = 3 - xdec;
  bsizey = 3 - ydec;
242
  if (dirinit && pri_strength == 0 && sec_strength == 0) {
243 244 245 246 247 248 249
    // If we're here, both primary and secondary strengths are 0, and
    // we still haven't written anything to y[] yet, so we just copy
    // the input to y[]. This is necessary only for av1_cdef_search()
    // and only av1_cdef_search() sets dirinit.
    for (bi = 0; bi < cdef_count; bi++) {
      by = dlist[bi].by;
      bx = dlist[bi].bx;
250 251 252 253
      int iy, ix;
      // TODO(stemidts/jmvalin): SIMD optimisations
      for (iy = 0; iy < 1 << bsizey; iy++)
        for (ix = 0; ix < 1 << bsizex; ix++)
254
          dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
Steinar Midtskogen's avatar
Steinar Midtskogen committed
255
              in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
256
    }
257
    return;
258
  }
259 260 261 262 263 264 265 266 267 268 269 270

  if (pli == 0) {
    if (!dirinit || !*dirinit) {
      for (bi = 0; bi < cdef_count; bi++) {
        by = dlist[bi].by;
        bx = dlist[bi].bx;
        dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
                                    CDEF_BSTRIDE, &var[by][bx], coeff_shift);
      }
      if (dirinit) *dirinit = 1;
    }
  }
271 272 273 274 275 276 277 278 279
  if (pli == 1 && xdec != ydec) {
    for (bi = 0; bi < cdef_count; bi++) {
      static const int conv422[8] = { 7, 0, 2, 4, 5, 6, 6, 6 };
      static const int conv440[8] = { 1, 2, 2, 2, 3, 4, 6, 0 };
      by = dlist[bi].by;
      bx = dlist[bi].bx;
      dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]];
    }
  }
280 281

  for (bi = 0; bi < cdef_count; bi++) {
282 283
    int t = dlist[bi].skip ? 0 : pri_strength;
    int s = dlist[bi].skip ? 0 : sec_strength;
284 285 286
    by = dlist[bi].by;
    bx = dlist[bi].bx;
    if (dst8)
287 288 289 290 291 292
      cdef_filter_block(&dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL,
                        dstride,
                        &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
                        (pli ? t : adjust_strength(t, var[by][bx])), s,
                        t ? dir[by][bx] : 0, pri_damping, sec_damping, bsize,
                        (256 << coeff_shift) - 1, coeff_shift);
293 294
    else
      cdef_filter_block(
clang-format's avatar
clang-format committed
295 296 297
          NULL,
          &dst16[dirinit ? bi << (bsizex + bsizey)
                         : (by << bsizey) * dstride + (bx << bsizex)],
298 299 300
          dirinit ? 1 << bsizex : dstride,
          &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
          (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
301 302
          pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1,
          coeff_shift);
303
  }
Yaowu Xu's avatar
Yaowu Xu committed
304
}