od_dering.c 13.4 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1
2
3
4
5
6
7
8
9
10
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */
Yaowu Xu's avatar
Yaowu Xu committed
11
#ifdef HAVE_CONFIG_H
clang-format's avatar
clang-format committed
12
#include "config.h"
Yaowu Xu's avatar
Yaowu Xu committed
13
14
#endif

15
16
// clang-format off

Yaowu Xu's avatar
Yaowu Xu committed
17
18
19
#include <stdlib.h>
#include <math.h>
#include "dering.h"
20
#include "./av1_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
21
22
23

/* Generated from gen_filter_tables.c. */
const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
clang-format's avatar
clang-format committed
24
25
26
27
28
29
30
31
32
33
  { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
    -3 * OD_FILT_BSTRIDE + 3 },
  { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
    -1 * OD_FILT_BSTRIDE + 3 },
  { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
  { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
  { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
Yaowu Xu's avatar
Yaowu Xu committed
34
35
36
37
38
39
40
41
42
};

/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
   The search minimizes the weighted variance along all the lines in a
   particular direction, i.e. the squared error between the input and a
   "predicted" block where each pixel is replaced by the average along a line
   in a particular direction. Since each direction have the same sum(x^2) term,
   that term is never computed. See Section 2, step 2, of:
   http://jmvalin.ca/notes/intra_paint.pdf */
43
44
int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var,
                   int coeff_shift) {
Yaowu Xu's avatar
Yaowu Xu committed
45
  int i;
clang-format's avatar
clang-format committed
46
47
  int32_t cost[8] = { 0 };
  int partial[8][15] = { { 0 } };
Yaowu Xu's avatar
Yaowu Xu committed
48
49
50
51
52
  int32_t best_cost = 0;
  int best_dir = 0;
  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
     The output is then 840 times larger, but we don't care for finding
     the max. */
clang-format's avatar
clang-format committed
53
  static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
Yaowu Xu's avatar
Yaowu Xu committed
54
55
56
57
58
59
  for (i = 0; i < 8; i++) {
    int j;
    for (j = 0; j < 8; j++) {
      int x;
      /* We subtract 128 here to reduce the maximum range of the squared
         partial sums. */
clang-format's avatar
clang-format committed
60
      x = (img[i * stride + j] >> coeff_shift) - 128;
Yaowu Xu's avatar
Yaowu Xu committed
61
      partial[0][i + j] += x;
clang-format's avatar
clang-format committed
62
      partial[1][i + j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
63
      partial[2][i] += x;
clang-format's avatar
clang-format committed
64
      partial[3][3 + i - j / 2] += x;
Yaowu Xu's avatar
Yaowu Xu committed
65
      partial[4][7 + i - j] += x;
clang-format's avatar
clang-format committed
66
      partial[5][3 - i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
67
      partial[6][j] += x;
clang-format's avatar
clang-format committed
68
      partial[7][i / 2 + j] += x;
Yaowu Xu's avatar
Yaowu Xu committed
69
70
71
    }
  }
  for (i = 0; i < 8; i++) {
clang-format's avatar
clang-format committed
72
73
    cost[2] += partial[2][i] * partial[2][i];
    cost[6] += partial[6][i] * partial[6][i];
Yaowu Xu's avatar
Yaowu Xu committed
74
75
76
77
  }
  cost[2] *= div_table[8];
  cost[6] *= div_table[8];
  for (i = 0; i < 7; i++) {
clang-format's avatar
clang-format committed
78
79
80
81
82
83
    cost[0] += (partial[0][i] * partial[0][i] +
                partial[0][14 - i] * partial[0][14 - i]) *
               div_table[i + 1];
    cost[4] += (partial[4][i] * partial[4][i] +
                partial[4][14 - i] * partial[4][14 - i]) *
               div_table[i + 1];
Yaowu Xu's avatar
Yaowu Xu committed
84
  }
clang-format's avatar
clang-format committed
85
86
  cost[0] += partial[0][7] * partial[0][7] * div_table[8];
  cost[4] += partial[4][7] * partial[4][7] * div_table[8];
Yaowu Xu's avatar
Yaowu Xu committed
87
88
89
  for (i = 1; i < 8; i += 2) {
    int j;
    for (j = 0; j < 4 + 1; j++) {
clang-format's avatar
clang-format committed
90
      cost[i] += partial[i][3 + j] * partial[i][3 + j];
Yaowu Xu's avatar
Yaowu Xu committed
91
92
93
    }
    cost[i] *= div_table[8];
    for (j = 0; j < 4 - 1; j++) {
clang-format's avatar
clang-format committed
94
95
96
      cost[i] += (partial[i][j] * partial[i][j] +
                  partial[i][10 - j] * partial[i][10 - j]) *
                 div_table[2 * j + 2];
Yaowu Xu's avatar
Yaowu Xu committed
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
    }
  }
  for (i = 0; i < 8; i++) {
    if (cost[i] > best_cost) {
      best_cost = cost[i];
      best_dir = i;
    }
  }
  /* Difference between the optimal variance and the variance along the
     orthogonal direction. Again, the sum(x^2) terms cancel out. */
  *var = best_cost - cost[(best_dir + 4) & 7];
  /* We'd normally divide by 840, but dividing by 1024 is close enough
     for what we're going to do with this. */
  *var >>= 10;
  return best_dir;
}

#define OD_DERING_VERY_LARGE (30000)
clang-format's avatar
clang-format committed
115
116
#define OD_DERING_INBUF_SIZE \
  ((OD_BSIZE_MAX + 2 * OD_FILT_BORDER) * (OD_BSIZE_MAX + 2 * OD_FILT_BORDER))
Yaowu Xu's avatar
Yaowu Xu committed
117
118

/* Smooth in the direction detected. */
119
120
int od_filter_dering_direction_8x8_c(int16_t *y, int ystride, const int16_t *in,
                                     int threshold, int dir) {
Yaowu Xu's avatar
Yaowu Xu committed
121
122
123
  int i;
  int j;
  int k;
124
  static const int taps[3] = { 3, 2, 1 };
125
  int total_abs = 0;
126
127
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++) {
Yaowu Xu's avatar
Yaowu Xu committed
128
129
130
      int16_t sum;
      int16_t xx;
      int16_t yy;
clang-format's avatar
clang-format committed
131
132
      xx = in[i * OD_FILT_BSTRIDE + j];
      sum = 0;
Yaowu Xu's avatar
Yaowu Xu committed
133
134
135
      for (k = 0; k < 3; k++) {
        int16_t p0;
        int16_t p1;
clang-format's avatar
clang-format committed
136
137
138
139
140
141
        p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
             xx;
        p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
             xx;
        if (abs(p0) < threshold) sum += taps[k] * p0;
        if (abs(p1) < threshold) sum += taps[k] * p1;
Yaowu Xu's avatar
Yaowu Xu committed
142
      }
143
144
145
      sum = (sum + 8) >> 4;
      total_abs += abs(sum);
      yy = xx + sum;
clang-format's avatar
clang-format committed
146
      y[i * ystride + j] = yy;
Yaowu Xu's avatar
Yaowu Xu committed
147
148
    }
  }
149
  return (total_abs + 8) >> 4;
Yaowu Xu's avatar
Yaowu Xu committed
150
151
}

152
/* Smooth in the direction detected. */
153
154
int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
                                     int threshold, int dir) {
155
156
157
158
  int i;
  int j;
  int k;
  static const int taps[2] = { 4, 1 };
159
  int total_abs = 0;
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      int16_t sum;
      int16_t xx;
      int16_t yy;
      xx = in[i * OD_FILT_BSTRIDE + j];
      sum = 0;
      for (k = 0; k < 2; k++) {
        int16_t p0;
        int16_t p1;
        p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
             xx;
        p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
             xx;
        if (abs(p0) < threshold) sum += taps[k] * p0;
        if (abs(p1) < threshold) sum += taps[k] * p1;
      }
177
178
179
      sum = (sum + 8) >> 4;
      total_abs += abs(sum);
      yy = xx + sum;
180
181
182
      y[i * ystride + j] = yy;
    }
  }
183
  return (total_abs + 2) >> 2;
Yaowu Xu's avatar
Yaowu Xu committed
184
185
186
}

/* Smooth in the direction orthogonal to what was detected. */
187
void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
188
189
                                       const int16_t *in, int threshold,
                                       int dir) {
Yaowu Xu's avatar
Yaowu Xu committed
190
191
192
  int i;
  int j;
  int offset;
clang-format's avatar
clang-format committed
193
194
195
196
  if (dir > 0 && dir < 4)
    offset = OD_FILT_BSTRIDE;
  else
    offset = 1;
197
198
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++) {
Yaowu Xu's avatar
Yaowu Xu committed
199
200
201
      int16_t yy;
      int16_t sum;
      int16_t p;
clang-format's avatar
clang-format committed
202
      yy = in[i * OD_FILT_BSTRIDE + j];
Yaowu Xu's avatar
Yaowu Xu committed
203
      sum = 0;
clang-format's avatar
clang-format committed
204
      p = in[i * OD_FILT_BSTRIDE + j + offset] - yy;
205
      if (abs(p) < threshold) sum += p;
clang-format's avatar
clang-format committed
206
      p = in[i * OD_FILT_BSTRIDE + j - offset] - yy;
207
      if (abs(p) < threshold) sum += p;
clang-format's avatar
clang-format committed
208
      p = in[i * OD_FILT_BSTRIDE + j + 2 * offset] - yy;
209
      if (abs(p) < threshold) sum += p;
clang-format's avatar
clang-format committed
210
      p = in[i * OD_FILT_BSTRIDE + j - 2 * offset] - yy;
211
      if (abs(p) < threshold) sum += p;
clang-format's avatar
clang-format committed
212
      y[i * ystride + j] = yy + ((3 * sum + 8) >> 4);
Yaowu Xu's avatar
Yaowu Xu committed
213
214
215
216
    }
  }
}

217
/* Smooth in the direction orthogonal to what was detected. */
Yaowu Xu's avatar
Yaowu Xu committed
218
void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
219
220
                                       const int16_t *in, int threshold,
                                       int dir) {
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
  int i;
  int j;
  int offset;
  if (dir > 0 && dir < 4)
    offset = OD_FILT_BSTRIDE;
  else
    offset = 1;
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      int16_t yy;
      int16_t sum;
      int16_t p;
      yy = in[i * OD_FILT_BSTRIDE + j];
      sum = 0;
      p = in[i * OD_FILT_BSTRIDE + j + offset] - yy;
236
      if (abs(p) < threshold) sum += p;
237
      p = in[i * OD_FILT_BSTRIDE + j - offset] - yy;
238
      if (abs(p) < threshold) sum += p;
239
240
241
      y[i * ystride + j] = yy + ((5 * sum + 8) >> 4);
    }
  }
Yaowu Xu's avatar
Yaowu Xu committed
242
243
244
245
246
247
}

/* This table approximates x^0.16 with the index being log2(x). It is clamped
   to [-.5, 3]. The table is computed as:
   round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
static const int16_t OD_THRESH_TABLE_Q8[18] = {
clang-format's avatar
clang-format committed
248
249
  128, 134, 150, 168, 188, 210, 234, 262, 292,
  327, 365, 408, 455, 509, 569, 635, 710, 768,
Yaowu Xu's avatar
Yaowu Xu committed
250
251
};

252
/* Compute deringing filter threshold for an 8x8 block based on the
Yaowu Xu's avatar
Yaowu Xu committed
253
254
255
256
257
   directional variance difference. A high variance difference means that we
   have a highly directional pattern (e.g. a high contrast edge), so we can
   apply more deringing. A low variance means that we either have a low
   contrast edge, or a non-directional texture, so we want to be careful not
   to blur. */
258
259
260
261
262
static INLINE int od_adjust_thresh(int threshold, int32_t var) {
  int v1;
  /* We use the variance of 8x8 blocks to adjust the threshold. */
  v1 = OD_MINI(32767, var >> 6);
  return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
Yaowu Xu's avatar
Yaowu Xu committed
263
264
}

265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
static INLINE void copy_8x8_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
  int i, j;
  for (i = 0; i < 8; i++)
    for (j = 0; j < 8; j++)
      dst[i * dstride + j] = src[i * sstride + j];
}

static INLINE void copy_4x4_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
  int i, j;
  for (i = 0; i < 4; i++)
    for (j = 0; j < 4; j++)
      dst[i * dstride + j] = src[i * sstride + j];
}

/* TODO: Optimize this function for SSE. */
void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
    unsigned char (*bskip)[2], int dering_count, int bsize)
{
  int bi, bx, by;
  if (bsize == 3) {
    for (bi = 0; bi < dering_count; bi++) {
      by = bskip[bi][0];
      bx = bskip[bi][1];
      copy_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)],
                     dstride,
                     &src[(by << 3) * sstride + (bx << 3)], sstride);
    }
  } else {
    for (bi = 0; bi < dering_count; bi++) {
      by = bskip[bi][0];
      bx = bskip[bi][1];
      copy_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)],
                     dstride,
                     &src[(by << 2) * sstride + (bx << 2)], sstride);
    }
  }
}

303
304
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
               int nhb, int nvb, int sbx, int sby, int nhsb, int nvsb, int xdec,
305
               int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
306
               unsigned char (*bskip)[2], int dering_count, int threshold,
307
               int coeff_shift) {
Yaowu Xu's avatar
Yaowu Xu committed
308
309
  int i;
  int j;
310
  int bi;
Yaowu Xu's avatar
Yaowu Xu committed
311
312
313
314
  int bx;
  int by;
  int16_t inbuf[OD_DERING_INBUF_SIZE];
  int16_t *in;
315
  int bsize;
316
  int filter2_thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
317
  od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES] = {
318
    od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
319
320
  };
  od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES] = {
321
    od_filter_dering_orthogonal_4x4, od_filter_dering_orthogonal_8x8
322
  };
323
  bsize = 3 - xdec;
clang-format's avatar
clang-format committed
324
  in = inbuf + OD_FILT_BORDER * OD_FILT_BSTRIDE + OD_FILT_BORDER;
Yaowu Xu's avatar
Yaowu Xu committed
325
326
327
328
  /* We avoid filtering the pixels for which some of the pixels to average
     are outside the frame. We could change the filter instead, but it would
     add special cases for any future vectorization. */
  for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
clang-format's avatar
clang-format committed
329
  for (i = -OD_FILT_BORDER * (sby != 0);
330
       i < (nvb << bsize) + OD_FILT_BORDER * (sby != nvsb - 1); i++) {
clang-format's avatar
clang-format committed
331
    for (j = -OD_FILT_BORDER * (sbx != 0);
332
         j < (nhb << bsize) + OD_FILT_BORDER * (sbx != nhsb - 1); j++) {
clang-format's avatar
clang-format committed
333
      in[i * OD_FILT_BSTRIDE + j] = x[i * xstride + j];
Yaowu Xu's avatar
Yaowu Xu committed
334
335
336
    }
  }
  if (pli == 0) {
337
    for (bi = 0; bi < dering_count; bi++) {
338
      int32_t var;
339
340
341
      by = bskip[bi][0];
      bx = bskip[bi][1];
      dir[by][bx] = od_dir_find8(&x[8 * by * xstride + 8 * bx], xstride,
342
                                 &var, coeff_shift);
343
344
345
346
347
348
349
350
351
352
353
      /* Deringing orthogonal to the direction uses a tighter threshold
         because we want to be conservative. We've presumably already
         achieved some deringing, so the amount of change is expected
         to be low. Also, since we might be filtering across an edge, we
         want to make sure not to blur it. That being said, we might want
         to be a little bit more aggressive on pure horizontal/vertical
         since the ringing there tends to be directional, so it doesn't
         get removed by the directional filtering. */
      filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
          &y[(by * ystride << bsize) + (bx << bsize)], ystride,
          &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
354
          od_adjust_thresh(threshold, var), dir[by][bx]);
Yaowu Xu's avatar
Yaowu Xu committed
355
    }
clang-format's avatar
clang-format committed
356
  } else {
357
358
359
360
361
362
363
    for (bi = 0; bi < dering_count; bi++) {
      by = bskip[bi][0];
      bx = bskip[bi][1];
      filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
          &y[(by * ystride << bsize) + (bx << bsize)], ystride,
          &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
          dir[by][bx]);
Yaowu Xu's avatar
Yaowu Xu committed
364
365
    }
  }
366
367
  copy_blocks_16bit(in, OD_FILT_BSTRIDE, y, ystride, bskip, dering_count,
      bsize);
368
369
370
371
372
373
374
375
  for (bi = 0; bi < dering_count; bi++) {
    by = bskip[bi][0];
    bx = bskip[bi][1];
    if (filter2_thresh[by][bx] == 0) continue;
    (filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
        &y[(by * ystride << bsize) + (bx << bsize)], ystride,
        &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
        dir[by][bx]);
Yaowu Xu's avatar
Yaowu Xu committed
376
377
  }
}