convolve.c 12.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
13
#include <string.h>
14

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./av1_rtcd.h"
16
#include "av1/common/convolve.h"
17
#include "av1/common/filter.h"
Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_ports/mem.h"
20

21
22
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
23
24
25
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

Yaowu Xu's avatar
Yaowu Xu committed
26
27
28
29
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
                          const int subpel_x_q4, int x_step_q4, int avg) {
30
  int x, y;
31
  int filter_size = filter_params.taps;
32
33
34
35
36
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
37
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
38
          filter_params, x_q4 & SUBPEL_MASK);
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg) {
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
      } else {
        dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      }
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
54
55
56
57
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
                         const InterpFilterParams filter_params,
                         const int subpel_y_q4, int y_step_q4, int avg) {
58
  int x, y;
59
  int filter_size = filter_params.taps;
60
61
62
63
64
65
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
66
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
67
          filter_params, y_q4 & SUBPEL_MASK);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
            1);
      } else {
        dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h, int avg) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w);
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
107
108
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                  int dst_stride, int w, int h,
109
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
110
                  const InterpFilter *interp_filter,
111
#else
James Zern's avatar
James Zern committed
112
                  const InterpFilter interp_filter,
113
#endif
Yaowu Xu's avatar
Yaowu Xu committed
114
115
                  const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
                  int y_step_q4, int ref_idx) {
Angie Chiang's avatar
Angie Chiang committed
116
117
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
118
119
120
121
122
123

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
124
  if (ignore_horiz && ignore_vert) {
125
    convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
126
  } else if (ignore_vert) {
127
128
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
129
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
130
131
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
132
        av1_get_interp_filter_params(interp_filter);
133
134
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
Yaowu Xu's avatar
Yaowu Xu committed
135
136
    av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
                       subpel_x_q4, x_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
137
  } else if (ignore_horiz) {
138
139
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
140
        av1_get_interp_filter_params(interp_filter[2 * ref_idx]);
141
142
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
143
        av1_get_interp_filter_params(interp_filter);
144
145
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
Yaowu Xu's avatar
Yaowu Xu committed
146
147
    av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
                      subpel_y_q4, y_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
148
149
150
151
  } else {
    // temp's size is set to (maximum possible intermediate_height) *
    // MAX_BLOCK_WIDTH
    uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
152
                  MAX_FILTER_TAP) *
Angie Chiang's avatar
Angie Chiang committed
153
154
                 MAX_BLOCK_WIDTH];
    int temp_stride = MAX_BLOCK_WIDTH;
155
#if CONFIG_DUAL_FILTER
156
    InterpFilterParams filter_params_x =
Yaowu Xu's avatar
Yaowu Xu committed
157
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
158
    InterpFilterParams filter_params_y =
Yaowu Xu's avatar
Yaowu Xu committed
159
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
160
161
162
163
164
165
    InterpFilterParams filter_params = filter_params_x;

    // The filter size implies the required number of reference pixels for
    // the second stage filtering. It is possible that the two directions
    // require different filter sizes.
    int filter_size = filter_params_y.taps;
166
167
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
168
        av1_get_interp_filter_params(interp_filter);
169
    int filter_size = filter_params.taps;
170
#endif
Angie Chiang's avatar
Angie Chiang committed
171
172
173
    int intermediate_height =
        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;

174
175
    assert(filter_params.taps <= MAX_FILTER_TAP);

Yaowu Xu's avatar
Yaowu Xu committed
176
177
178
    av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
                       temp, temp_stride, w, intermediate_height, filter_params,
                       subpel_x_q4, x_step_q4, 0);
179
180

#if CONFIG_DUAL_FILTER
181
    filter_params = filter_params_y;
182
#else
Yaowu Xu's avatar
Yaowu Xu committed
183
    filter_params = av1_get_interp_filter_params(interp_filter);
184
185
186
187
#endif
    filter_size = filter_params.taps;
    assert(filter_params.taps <= MAX_FILTER_TAP);

Yaowu Xu's avatar
Yaowu Xu committed
188
189
190
    av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
                      dst, dst_stride, w, h, filter_params, subpel_y_q4,
                      y_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
191
  }
192
193
}

Yaowu Xu's avatar
Yaowu Xu committed
194
195
196
197
198
199
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 const InterpFilterParams filter_params,
                                 const int subpel_x_q4, int x_step_q4, int avg,
                                 int bd) {
200
  int x, y;
201
  int filter_size = filter_params.taps;
202
203
204
205
206
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
207
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
208
          filter_params, x_q4 & SUBPEL_MASK);
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg)
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      else
        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
225
226
227
228
229
void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
                                uint16_t *dst, int dst_stride, int w, int h,
                                const InterpFilterParams filter_params,
                                const int subpel_y_q4, int y_step_q4, int avg,
                                int bd) {
230
  int x, y;
231
  int filter_size = filter_params.taps;
232
233
234
235
236
237
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
238
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
239
          filter_params, y_q4 & SUBPEL_MASK);
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      } else {
        dst[y * dst_stride] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 int avg, int bd) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w * sizeof(*src));
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
281
282
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                         int dst_stride, int w, int h,
283
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
284
                         const InterpFilter *interp_filter,
285
#else
James Zern's avatar
James Zern committed
286
                         const InterpFilter interp_filter,
287
#endif
Yaowu Xu's avatar
Yaowu Xu committed
288
289
290
                         const int subpel_x_q4, int x_step_q4,
                         const int subpel_y_q4, int y_step_q4, int ref_idx,
                         int bd) {
Angie Chiang's avatar
Angie Chiang committed
291
292
293
294
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
295
296
297
298
299
300

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
301
  if (ignore_horiz && ignore_vert) {
302
    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
303
  } else if (ignore_vert) {
304
305
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
306
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
307
308
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
309
        av1_get_interp_filter_params(interp_filter);
310
#endif
Yaowu Xu's avatar
Yaowu Xu committed
311
312
313
    av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
                              filter_params, subpel_x_q4, x_step_q4, ref_idx,
                              bd);
Angie Chiang's avatar
Angie Chiang committed
314
  } else if (ignore_horiz) {
315
316
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
317
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
318
319
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
320
        av1_get_interp_filter_params(interp_filter);
321
#endif
Yaowu Xu's avatar
Yaowu Xu committed
322
323
324
    av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
                             filter_params, subpel_y_q4, y_step_q4, ref_idx,
                             bd);
Angie Chiang's avatar
Angie Chiang committed
325
326
327
328
329
330
331
332
  } else {
    // temp's size is set to (maximum possible intermediate_height) *
    // MAX_BLOCK_WIDTH
    uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
                   MAX_FILTER_TAP) *
                  MAX_BLOCK_WIDTH];
    int temp_stride = MAX_BLOCK_WIDTH;

333
#if CONFIG_DUAL_FILTER
334
    InterpFilterParams filter_params_x =
Yaowu Xu's avatar
Yaowu Xu committed
335
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
336
    InterpFilterParams filter_params_y =
Yaowu Xu's avatar
Yaowu Xu committed
337
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
338
339
    InterpFilterParams filter_params = filter_params_x;
    int filter_size = filter_params_y.taps;
340
341
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
342
        av1_get_interp_filter_params(interp_filter);
343
    int filter_size = filter_params.taps;
344
#endif
345

Angie Chiang's avatar
Angie Chiang committed
346
347
348
    int intermediate_height =
        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;

Yaowu Xu's avatar
Yaowu Xu committed
349
    av1_highbd_convolve_horiz(
clang-format's avatar
clang-format committed
350
351
        src - src_stride * (filter_size / 2 - 1), src_stride, temp, temp_stride,
        w, intermediate_height, filter_params, subpel_x_q4, x_step_q4, 0, bd);
352
353

#if CONFIG_DUAL_FILTER
354
    filter_params = filter_params_y;
355
356
357
358
#endif
    filter_size = filter_params.taps;
    assert(filter_params.taps <= MAX_FILTER_TAP);

Yaowu Xu's avatar
Yaowu Xu committed
359
360
361
    av1_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
                             temp_stride, dst, dst_stride, w, h, filter_params,
                             subpel_y_q4, y_step_q4, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
362
  }
363
}
Yaowu Xu's avatar
Yaowu Xu committed
364
#endif  // CONFIG_AOM_HIGHBITDEPTH