vp10_convolve.c 12.5 KB
Newer Older
1
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
2
#include <string.h>
3

4
#include "./vp10_rtcd.h"
5
6
7
8
#include "vp10/common/filter.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"

9
10
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
11
12
13
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

14
void vp10_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
15
16
17
18
                           int dst_stride, int w, int h,
                           const InterpFilterParams filter_params,
                           const int subpel_x_q4, int x_step_q4, int avg) {
  int x, y;
19
  int filter_size = filter_params.taps;
20
21
22
23
24
25
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      const int16_t *x_filter =
26
27
          vp10_get_interp_filter_subpel_kernel(
              filter_params, x_q4 & SUBPEL_MASK);
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg) {
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
      } else {
        dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      }
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

43
void vp10_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
44
45
46
47
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
                          const int subpel_y_q4, int y_step_q4, int avg) {
  int x, y;
48
  int filter_size = filter_params.taps;
49
50
51
52
53
54
55
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      const int16_t *y_filter =
56
57
          vp10_get_interp_filter_subpel_kernel(
              filter_params, y_q4 & SUBPEL_MASK);
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
            1);
      } else {
        dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h, int avg) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w);
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

97
98
void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                   int dst_stride, int w, int h,
99
100
101
102
103
#if CONFIG_DUAL_FILTER
                   const INTERP_FILTER *interp_filter,
#else
                   const INTERP_FILTER interp_filter,
#endif
104
                   const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
105
                   int y_step_q4, int ref_idx) {
Angie Chiang's avatar
Angie Chiang committed
106
107
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
108
109
110
111
112
113

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
114
  if (ignore_horiz && ignore_vert) {
115
    convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
116
  } else if (ignore_vert) {
117
118
119
120
121
122
123
124
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
#else
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter);
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
125
126
    vp10_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
                        subpel_x_q4, x_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
127
  } else if (ignore_horiz) {
128
129
130
131
132
133
134
135
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter[2 * ref_idx]);
#else
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter);
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
136
137
    vp10_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
                       subpel_y_q4, y_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
138
139
140
141
  } else {
    // temp's size is set to (maximum possible intermediate_height) *
    // MAX_BLOCK_WIDTH
    uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
142
                  MAX_FILTER_TAP) *
Angie Chiang's avatar
Angie Chiang committed
143
144
                 MAX_BLOCK_WIDTH];
    int temp_stride = MAX_BLOCK_WIDTH;
145
#if CONFIG_DUAL_FILTER
146
    InterpFilterParams filter_params_x =
147
        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
148
149
150
151
152
153
154
155
    InterpFilterParams filter_params_y =
        vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
    InterpFilterParams filter_params = filter_params_x;

    // The filter size implies the required number of reference pixels for
    // the second stage filtering. It is possible that the two directions
    // require different filter sizes.
    int filter_size = filter_params_y.taps;
156
157
158
159
#else
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter);
    int filter_size = filter_params.taps;
160
#endif
Angie Chiang's avatar
Angie Chiang committed
161
162
163
    int intermediate_height =
        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;

164
165
    assert(filter_params.taps <= MAX_FILTER_TAP);

166
    vp10_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
167
                        temp, temp_stride, w, intermediate_height,
168
                        filter_params, subpel_x_q4, x_step_q4, 0);
169
170

#if CONFIG_DUAL_FILTER
171
    filter_params = filter_params_y;
172
173
174
175
176
177
#else
    filter_params = vp10_get_interp_filter_params(interp_filter);
#endif
    filter_size = filter_params.taps;
    assert(filter_params.taps <= MAX_FILTER_TAP);

178
    vp10_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
179
180
                       dst, dst_stride, w, h, filter_params,
                       subpel_y_q4, y_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
181
  }
182
183
184
}

#if CONFIG_VP9_HIGHBITDEPTH
185
void vp10_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
186
187
188
189
190
                                  uint16_t *dst, int dst_stride, int w, int h,
                                  const InterpFilterParams filter_params,
                                  const int subpel_x_q4, int x_step_q4, int avg,
                                  int bd) {
  int x, y;
191
  int filter_size = filter_params.taps;
192
193
194
195
196
197
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      const int16_t *x_filter =
198
199
          vp10_get_interp_filter_subpel_kernel(
              filter_params, x_q4 & SUBPEL_MASK);
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg)
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      else
        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

216
void vp10_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
217
218
219
220
221
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 const InterpFilterParams filter_params,
                                 const int subpel_y_q4, int y_step_q4, int avg,
                                 int bd) {
  int x, y;
222
  int filter_size = filter_params.taps;
223
224
225
226
227
228
229
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      const int16_t *y_filter =
230
231
          vp10_get_interp_filter_subpel_kernel(
              filter_params, y_q4 & SUBPEL_MASK);
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      } else {
        dst[y * dst_stride] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 int avg, int bd) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w * sizeof(*src));
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

273
274
void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                          int dst_stride, int w, int h,
275
276
277
278
279
#if CONFIG_DUAL_FILTER
                          const INTERP_FILTER *interp_filter,
#else
                          const INTERP_FILTER interp_filter,
#endif
280
                          const int subpel_x_q4, int x_step_q4,
281
                          const int subpel_y_q4, int y_step_q4, int ref_idx,
282
                          int bd) {
Angie Chiang's avatar
Angie Chiang committed
283
284
285
286
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
287
288
289
290
291
292

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
293
  if (ignore_horiz && ignore_vert) {
294
    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
295
  } else if (ignore_vert) {
296
297
298
299
300
301
302
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
#else
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter);
#endif
303
304
305
    vp10_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
                               filter_params, subpel_x_q4, x_step_q4, ref_idx,
                               bd);
Angie Chiang's avatar
Angie Chiang committed
306
  } else if (ignore_horiz) {
307
308
309
310
311
312
313
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
#else
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter);
#endif
314
315
316
    vp10_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
                              filter_params, subpel_y_q4, y_step_q4, ref_idx,
                              bd);
Angie Chiang's avatar
Angie Chiang committed
317
318
319
320
321
322
323
324
  } else {
    // temp's size is set to (maximum possible intermediate_height) *
    // MAX_BLOCK_WIDTH
    uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
                   MAX_FILTER_TAP) *
                  MAX_BLOCK_WIDTH];
    int temp_stride = MAX_BLOCK_WIDTH;

325
#if CONFIG_DUAL_FILTER
326
    InterpFilterParams filter_params_x =
327
        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
328
329
330
331
    InterpFilterParams filter_params_y =
        vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
    InterpFilterParams filter_params = filter_params_x;
    int filter_size = filter_params_y.taps;
332
333
334
335
#else
    InterpFilterParams filter_params =
        vp10_get_interp_filter_params(interp_filter);
    int filter_size = filter_params.taps;
336
#endif
337

Angie Chiang's avatar
Angie Chiang committed
338
339
340
    int intermediate_height =
        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;

341
342
343
344
    vp10_highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1),
                               src_stride, temp, temp_stride, w,
                               intermediate_height, filter_params, subpel_x_q4,
                               x_step_q4, 0, bd);
345
346

#if CONFIG_DUAL_FILTER
347
    filter_params = filter_params_y;
348
349
350
351
#endif
    filter_size = filter_params.taps;
    assert(filter_params.taps <= MAX_FILTER_TAP);

352
353
354
    vp10_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
                              temp_stride, dst, dst_stride, w, h, filter_params,
                              subpel_y_q4, y_step_q4, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
355
  }
356
357
}
#endif  // CONFIG_VP9_HIGHBITDEPTH