convolve.c 79.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
13
#include <string.h>
14

15
#include "./aom_dsp_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
16
#include "./av1_rtcd.h"
17
#include "av1/common/blockd.h"
18
#include "av1/common/convolve.h"
19
#include "av1/common/filter.h"
Angie Chiang's avatar
Angie Chiang committed
20
#include "av1/common/onyxc_int.h"
Yaowu Xu's avatar
Yaowu Xu committed
21
#include "aom_dsp/aom_dsp_common.h"
22
#include "aom_ports/mem.h"
23

24
25
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
26
27
#define MAX_STEP (32)

28
#if CONFIG_FRAME_SUPERRES
29
30
31
32
33
34
35
36
37

#define UPSCALE_PROC_UNIT 64  // Source step (roughly)
#define UPSCALE_PROC_UNIT_SCALE (UPSCALE_PROC_UNIT / SCALE_NUMERATOR)

void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
                             const int16_t *x_filters, int interp_taps,
                             const int x0_qn, const int x_step_qn) {
  src -= interp_taps / 2 - 1;
38
  for (int y = 0; y < h; ++y) {
39
    int x_qn = x0_qn;
40
    for (int x = 0; x < w; ++x) {
41
42
43
44
45
      const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
      const int x_filter_idx =
          (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
      assert(x_filter_idx <= RS_SUBPEL_MASK);
      const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
46
47
      int sum = 0;
      for (int k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
48
49
50
51
52
53
54
55
56
57
58
59
60
61
      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      x_qn += x_step_qn;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

#if CONFIG_HIGHBITDEPTH
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
                                    uint16_t *dst, int dst_stride, int w, int h,
                                    const int16_t *x_filters, int interp_taps,
                                    int x0_qn, int x_step_qn, int bd) {
  src -= interp_taps / 2 - 1;
62
  for (int y = 0; y < h; ++y) {
63
    int x_qn = x0_qn;
64
    for (int x = 0; x < w; ++x) {
65
66
67
68
69
      const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
      const int x_filter_idx =
          (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
      assert(x_filter_idx <= RS_SUBPEL_MASK);
      const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
70
71
      int sum = 0;
      for (int k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
72
73
74
75
76
77
78
79
      dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_qn += x_step_qn;
    }
    src += src_stride;
    dst += dst_stride;
  }
}
#endif  // CONFIG_HIGHBITDEPTH
80
#endif  // CONFIG_FRAME_SUPERRES
81

Yaowu Xu's avatar
Yaowu Xu committed
82
83
84
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
85
86
                          const int subpel_x_q4, int x_step_q4,
                          ConvolveParams *conv_params) {
87
  int filter_size = filter_params.taps;
Angie Chiang's avatar
Angie Chiang committed
88
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
89
  src -= filter_size / 2 - 1;
90
  for (int y = 0; y < h; ++y) {
91
    int x_q4 = subpel_x_q4;
92
    for (int x = 0; x < w; ++x) {
93
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
94
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
95
          filter_params, x_q4 & SUBPEL_MASK);
96
97
      int sum = 0;
      for (int k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
98

Angie Chiang's avatar
Angie Chiang committed
99
      sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
100
      if (conv_params->do_average)
Angie Chiang's avatar
Angie Chiang committed
101
102
103
        dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
      else
        dst[x] = sum;
104

105
106
107
108
109
110
111
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Fergus Simpson's avatar
Fergus Simpson committed
112
113
114
115
116
117
void av1_convolve_horiz_scale(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              const InterpFilterParams filter_params,
                              const int subpel_x_qn, int x_step_qn,
                              ConvolveParams *conv_params) {
  int filter_size = filter_params.taps;
118
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
Fergus Simpson's avatar
Fergus Simpson committed
119
  src -= filter_size / 2 - 1;
120
  for (int y = 0; y < h; ++y) {
Fergus Simpson's avatar
Fergus Simpson committed
121
    int x_qn = subpel_x_qn;
122
    for (int x = 0; x < w; ++x) {
Fergus Simpson's avatar
Fergus Simpson committed
123
124
125
126
127
      const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];
      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(x_filter_idx < SUBPEL_SHIFTS);
      const int16_t *x_filter =
          av1_get_interp_filter_subpel_kernel(filter_params, x_filter_idx);
128
129
      int sum = 0;
      for (int k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
Fergus Simpson's avatar
Fergus Simpson committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143

      sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      if (conv_params->do_average)
        dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
      else
        dst[x] = sum;

      x_qn += x_step_qn;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
144
145
146
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
                         const InterpFilterParams filter_params,
147
148
                         const int subpel_y_q4, int y_step_q4,
                         ConvolveParams *conv_params) {
149
  int filter_size = filter_params.taps;
Angie Chiang's avatar
Angie Chiang committed
150
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
151
  src -= src_stride * (filter_size / 2 - 1);
152
  for (int x = 0; x < w; ++x) {
153
    int y_q4 = subpel_y_q4;
154
    for (int y = 0; y < h; ++y) {
155
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
156
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
157
          filter_params, y_q4 & SUBPEL_MASK);
158
159
      int sum = 0;
      for (int k = 0; k < filter_size; ++k)
160
        sum += src_y[k * src_stride] * y_filter[k];
161

Angie Chiang's avatar
Angie Chiang committed
162
      sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
163
      if (conv_params->do_average)
Angie Chiang's avatar
Angie Chiang committed
164
165
166
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
      else
        dst[y * dst_stride] = sum;
167

168
169
170
171
172
173
174
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

175
176
177
178
179
static void av1_convolve_vert_scale(const uint8_t *src, int src_stride,
                                    uint8_t *dst, int dst_stride, int w, int h,
                                    const InterpFilterParams filter_params,
                                    const int subpel_y_qn, int y_step_qn,
                                    ConvolveParams *conv_params) {
Fergus Simpson's avatar
Fergus Simpson committed
180
  int filter_size = filter_params.taps;
181
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
Fergus Simpson's avatar
Fergus Simpson committed
182
  src -= src_stride * (filter_size / 2 - 1);
183
  for (int x = 0; x < w; ++x) {
Fergus Simpson's avatar
Fergus Simpson committed
184
    int y_qn = subpel_y_qn;
185
    for (int y = 0; y < h; ++y) {
Fergus Simpson's avatar
Fergus Simpson committed
186
187
188
189
190
191
      const uint8_t *const src_y =
          &src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];
      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(y_filter_idx < SUBPEL_SHIFTS);
      const int16_t *y_filter =
          av1_get_interp_filter_subpel_kernel(filter_params, y_filter_idx);
192
193
      int sum = 0;
      for (int k = 0; k < filter_size; ++k)
Fergus Simpson's avatar
Fergus Simpson committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
        sum += src_y[k * src_stride] * y_filter[k];

      sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      if (conv_params->do_average)
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
      else
        dst[y * dst_stride] = sum;

      y_qn += y_step_qn;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
209
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
210
211
                          int dst_stride, int w, int h,
                          ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
212
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
213
  if (conv_params->do_average == 0) {
214
    for (int r = 0; r < h; ++r) {
Angie Chiang's avatar
Angie Chiang committed
215
      memcpy(dst, src, w);
Angie Chiang's avatar
Angie Chiang committed
216
217
218
219
      src += src_stride;
      dst += dst_stride;
    }
  } else {
220
221
    for (int r = 0; r < h; ++r) {
      for (int c = 0; c < w; ++c) {
Angie Chiang's avatar
Angie Chiang committed
222
        dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
223
      }
Angie Chiang's avatar
Angie Chiang committed
224
225
226
227
228
229
      src += src_stride;
      dst += dst_stride;
    }
  }
}

230
231
232
233
234
235
static void av1_convolve_horiz_facade(const uint8_t *src, int src_stride,
                                      uint8_t *dst, int dst_stride, int w,
                                      int h,
                                      const InterpFilterParams filter_params,
                                      const int subpel_x_q4, int x_step_q4,
                                      ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
236
237
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
  if (filter_params.taps == SUBPEL_TAPS) {
238
239
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
240
    if (conv_params->do_average == 0)
241
242
243
244
245
246
      aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                          NULL, -1, w, h);
    else
      aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x,
                              x_step_q4, NULL, -1, w, h);
  } else {
Angie Chiang's avatar
Angie Chiang committed
247
248
    av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
                       subpel_x_q4, x_step_q4, conv_params);
249
250
251
  }
}

252
253
254
255
256
257
static void av1_convolve_horiz_facade_c(const uint8_t *src, int src_stride,
                                        uint8_t *dst, int dst_stride, int w,
                                        int h,
                                        const InterpFilterParams filter_params,
                                        const int subpel_x_q4, int x_step_q4,
                                        ConvolveParams *conv_params) {
258
259
260
261
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
262
    if (conv_params->do_average == 0)
263
264
265
266
267
268
269
270
271
272
273
      aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
                            x_step_q4, NULL, -1, w, h);
    else
      aom_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
                                x_step_q4, NULL, -1, w, h);
  } else {
    av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
                         subpel_x_q4, x_step_q4, conv_params);
  }
}

Fergus Simpson's avatar
Fergus Simpson committed
274
275
276
277
278
void av1_convolve_horiz_facade_scale(const uint8_t *src, int src_stride,
                                     uint8_t *dst, int dst_stride, int w, int h,
                                     const InterpFilterParams filter_params,
                                     const int subpel_x_qn, int x_step_qn,
                                     ConvolveParams *conv_params) {
279
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
Fergus Simpson's avatar
Fergus Simpson committed
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_x = av1_get_interp_filter_subpel_kernel(
        filter_params, subpel_x_qn >> SCALE_EXTRA_BITS);
    if (conv_params->do_average == 0)
      aom_convolve8_horiz_scale(src, src_stride, dst, dst_stride, filter_x,
                                subpel_x_qn, x_step_qn, NULL, 0, -1, w, h);
    else
      aom_convolve8_avg_horiz_scale(src, src_stride, dst, dst_stride, filter_x,
                                    subpel_x_qn, x_step_qn, NULL, 0, -1, w, h);
  } else {
    av1_convolve_horiz_scale(src, src_stride, dst, dst_stride, w, h,
                             filter_params, subpel_x_qn, x_step_qn,
                             conv_params);
  }
}

296
297
298
299
300
static void av1_convolve_vert_facade(const uint8_t *src, int src_stride,
                                     uint8_t *dst, int dst_stride, int w, int h,
                                     const InterpFilterParams filter_params,
                                     const int subpel_y_q4, int y_step_q4,
                                     ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
301
302
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
  if (filter_params.taps == SUBPEL_TAPS) {
303
304
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
305
    if (conv_params->do_average == 0) {
306
307
308
309
310
311
312
      aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
                         y_step_q4, w, h);
    } else {
      aom_convolve8_avg_vert(src, src_stride, dst, dst_stride, NULL, -1,
                             filter_y, y_step_q4, w, h);
    }
  } else {
Angie Chiang's avatar
Angie Chiang committed
313
314
    av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
                      subpel_y_q4, y_step_q4, conv_params);
315
316
317
  }
}

318
319
320
321
322
323
static void av1_convolve_vert_facade_c(const uint8_t *src, int src_stride,
                                       uint8_t *dst, int dst_stride, int w,
                                       int h,
                                       const InterpFilterParams filter_params,
                                       const int subpel_y_q4, int y_step_q4,
                                       ConvolveParams *conv_params) {
324
325
326
327
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
328
    if (conv_params->do_average == 0) {
329
330
331
332
333
334
335
336
337
338
339
340
      aom_convolve8_vert_c(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
                           y_step_q4, w, h);
    } else {
      aom_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, NULL, -1,
                               filter_y, y_step_q4, w, h);
    }
  } else {
    av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
                        subpel_y_q4, y_step_q4, conv_params);
  }
}

Fergus Simpson's avatar
Fergus Simpson committed
341
342
343
344
345
void av1_convolve_vert_facade_scale(const uint8_t *src, int src_stride,
                                    uint8_t *dst, int dst_stride, int w, int h,
                                    const InterpFilterParams filter_params,
                                    const int subpel_y_qn, int y_step_qn,
                                    ConvolveParams *conv_params) {
346
  assert(conv_params->round == CONVOLVE_OPT_ROUND);
Fergus Simpson's avatar
Fergus Simpson committed
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_y = av1_get_interp_filter_subpel_kernel(
        filter_params, subpel_y_qn >> SCALE_EXTRA_BITS);
    if (conv_params->do_average == 0) {
      aom_convolve8_vert_scale(src, src_stride, dst, dst_stride, NULL, 0, -1,
                               filter_y, subpel_y_qn, y_step_qn, w, h);
    } else {
      aom_convolve8_avg_vert_scale(src, src_stride, dst, dst_stride, NULL, 0,
                                   -1, filter_y, subpel_y_qn, y_step_qn, w, h);
    }
  } else {
    av1_convolve_vert_scale(src, src_stride, dst, dst_stride, w, h,
                            filter_params, subpel_y_qn, y_step_qn, conv_params);
  }
}

Angie Chiang's avatar
Angie Chiang committed
363
#if CONFIG_CONVOLVE_ROUND
364
365
void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h, int bits) {
366
367
  for (int r = 0; r < h; ++r) {
    for (int c = 0; c < w; ++c) {
Angie Chiang's avatar
Angie Chiang committed
368
      dst[r * dst_stride + c] =
369
          clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], bits));
Angie Chiang's avatar
Angie Chiang committed
370
371
372
    }
  }
}
Angie Chiang's avatar
Angie Chiang committed
373

374
#if CONFIG_COMPOUND_ROUND
David Barker's avatar
David Barker committed
375
376
377
378
379
380
void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
                       int dst_stride, int w, int h,
                       InterpFilterParams *filter_params_x,
                       InterpFilterParams *filter_params_y,
                       const int subpel_x_q4, const int subpel_y_q4,
                       ConvolveParams *conv_params) {
381
  uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
Angie Chiang's avatar
Angie Chiang committed
382
383
384
385
  int im_h = h + filter_params_y->taps - 1;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;
386

Angie Chiang's avatar
Angie Chiang committed
387
388
389
390
  // horizontal filter
  const uint8_t *src_horiz = src - fo_vert * src_stride;
  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
391
392
  for (int y = 0; y < im_h; ++y) {
    for (int x = 0; x < w; ++x) {
393
      int32_t sum = 0;
394
      for (int k = 0; k < filter_params_x->taps; ++k) {
Angie Chiang's avatar
Angie Chiang committed
395
396
        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
      }
Angie Chiang's avatar
Angie Chiang committed
397
      im_block[y * im_stride + x] =
398
399
400
401
402
403
404
405
          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
    }
  }

  // vertical filter
  uint8_t *src_vert = im_block + fo_vert * im_stride;
  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
406
407
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
408
      CONV_BUF_TYPE sum = 0;
409
      for (int k = 0; k < filter_params_y->taps; ++k) {
410
411
412
        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
      }
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
413
414
415
416
417
418
419
420
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
    }
  }
}

421
#if CONFIG_JNT_COMP
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
                           CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                           InterpFilterParams *filter_params_x,
                           InterpFilterParams *filter_params_y,
                           const int subpel_x_q4, const int subpel_y_q4,
                           ConvolveParams *conv_params) {
  uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
  int im_h = h + filter_params_y->taps - 1;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;

  // horizontal filter
  const uint8_t *src_horiz = src - fo_vert * src_stride;
  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
438
439
  for (int y = 0; y < im_h; ++y) {
    for (int x = 0; x < w; ++x) {
440
      int32_t sum = 0;
441
      for (int k = 0; k < filter_params_x->taps; ++k) {
442
443
444
445
446
447
448
449
450
451
452
        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
      }
      im_block[y * im_stride + x] =
          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
    }
  }

  // vertical filter
  uint8_t *src_vert = im_block + fo_vert * im_stride;
  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
453
454
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
455
      CONV_BUF_TYPE sum = 0;
456
      for (int k = 0; k < filter_params_y->taps; ++k) {
457
458
459
        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
      }
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
Cheng Chen's avatar
Cheng Chen committed
460
      if (conv_params->use_jnt_comp_avg) {
461
462
463
464
465
        if (conv_params->do_average == 0) {
          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
        } else {
          dst[y * dst_stride + x] += res * conv_params->bck_offset;

Cheng Chen's avatar
Cheng Chen committed
466
467
          dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
                                                       DIST_PRECISION_BITS - 1);
468
        }
Cheng Chen's avatar
Cheng Chen committed
469
470
471
472
473
      } else {
        if (conv_params->do_average)
          dst[y * dst_stride + x] += res;
        else
          dst[y * dst_stride + x] = res;
474
      }
475
476
477
    }
  }
}
478
#endif  // CONFIG_JNT_COMP
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495

void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
                             CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                             InterpFilterParams *filter_params_x,
                             InterpFilterParams *filter_params_y,
                             const int subpel_x_qn, const int x_step_qn,
                             const int subpel_y_qn, const int y_step_qn,
                             ConvolveParams *conv_params) {
  uint8_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
             filter_params_y->taps;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;

  // horizontal filter
  const uint8_t *src_horiz = src - fo_vert * src_stride;
496
  for (int y = 0; y < im_h; ++y) {
497
    int x_qn = subpel_x_qn;
498
    for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
499
500
501
502
503
504
      const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(x_filter_idx < SUBPEL_SHIFTS);
      const int16_t *x_filter =
          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
      int sum = 0;
505
      for (int k = 0; k < filter_params_x->taps; ++k)
506
507
508
509
510
511
512
513
514
        sum += x_filter[k] * src_x[k - fo_horiz];
      im_block[y * im_stride + x] =
          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
    }
    src_horiz += src_stride;
  }

  // vertical filter
  const uint8_t *src_vert = im_block + fo_vert * im_stride;
515
  for (int x = 0; x < w; ++x) {
516
    int y_qn = subpel_y_qn;
517
    for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
518
519
520
521
522
523
524
      const uint8_t *const src_y =
          &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(y_filter_idx < SUBPEL_SHIFTS);
      const int16_t *y_filter =
          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
      CONV_BUF_TYPE sum = 0;
525
      for (int k = 0; k < filter_params_y->taps; ++k) {
526
527
528
        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
      }
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
529
#if CONFIG_JNT_COMP
Cheng Chen's avatar
Cheng Chen committed
530
      if (conv_params->use_jnt_comp_avg) {
531
532
533
534
535
        if (conv_params->do_average == 0) {
          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
        } else {
          dst[y * dst_stride + x] += res * conv_params->bck_offset;

Cheng Chen's avatar
Cheng Chen committed
536
537
          dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
                                                       DIST_PRECISION_BITS - 1);
538
        }
Cheng Chen's avatar
Cheng Chen committed
539
540
541
542
543
      } else {
        if (conv_params->do_average)
          dst[y * dst_stride + x] += res;
        else
          dst[y * dst_stride + x] = res;
544
545
      }
#else
546
547
548
549
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
550
#endif  // CONFIG_JNT_COMP
551
552
553
554
555
    }
    src_vert++;
  }
}

Angie Chiang's avatar
Angie Chiang committed
556
#else
557

558
559
560
561
562
563
/* When convolve-round is enabled and compound-round is disabled, we use a
   high-precision convolve filter.
   Note: For notes on hardware implementations, including the required
   bit widths for various intermediate values, see the comments above
   av1_warp_affine_c.
*/
David Barker's avatar
David Barker committed
564
565
566
567
568
569
void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
                       int dst_stride, int w, int h,
                       InterpFilterParams *filter_params_x,
                       InterpFilterParams *filter_params_y,
                       const int subpel_x_q4, const int subpel_y_q4,
                       ConvolveParams *conv_params) {
570
571
572
573
574
575
576
577
578
579
580
  int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
  int im_h = h + filter_params_y->taps - 1;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;
  const int bd = 8;

  // horizontal filter
  const uint8_t *src_horiz = src - fo_vert * src_stride;
  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
581
582
  for (int y = 0; y < im_h; ++y) {
    for (int x = 0; x < w; ++x) {
583
      int32_t sum = (1 << (bd + FILTER_BITS - 1));
584
      for (int k = 0; k < filter_params_x->taps; ++k) {
585
586
587
        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
      }
      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
Angie Chiang's avatar
Angie Chiang committed
588
      im_block[y * im_stride + x] =
589
          ROUND_POWER_OF_TWO(sum, conv_params->round_0);
Angie Chiang's avatar
Angie Chiang committed
590
591
592
593
    }
  }

  // vertical filter
594
  int32_t *src_vert = im_block + fo_vert * im_stride;
Angie Chiang's avatar
Angie Chiang committed
595
596
  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
597
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
598
599
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
600
      CONV_BUF_TYPE sum = 1 << offset_bits;
601
      for (int k = 0; k < filter_params_y->taps; ++k) {
Angie Chiang's avatar
Angie Chiang committed
602
603
        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
      }
604
605
606
607
      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                          ((1 << (offset_bits - conv_params->round_1)) +
                           (1 << (offset_bits - conv_params->round_1 - 1)));
608
609
610
611
612
613
614
615
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
    }
  }
}

Yunqing Wang's avatar
Yunqing Wang committed
616
617
618
619
620
621
622
623
624
625
626
627
628
629
void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
                      int dst_stride, int w, int h,
                      InterpFilterParams *filter_params_x,
                      InterpFilterParams *filter_params_y,
                      const int subpel_x_q4, const int subpel_y_q4,
                      ConvolveParams *conv_params) {
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1;
  (void)filter_params_x;
  (void)subpel_x_q4;

  // vertical filter
  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
630
631
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
Yunqing Wang's avatar
Yunqing Wang committed
632
      CONV_BUF_TYPE res = 0;
633
      for (int k = 0; k < filter_params_y->taps; ++k) {
Yunqing Wang's avatar
Yunqing Wang committed
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
        res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
      }
      res *= (1 << bits);
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
    }
  }
}

void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
                      int dst_stride, int w, int h,
                      InterpFilterParams *filter_params_x,
                      InterpFilterParams *filter_params_y,
                      const int subpel_x_q4, const int subpel_y_q4,
                      ConvolveParams *conv_params) {
  const int fo_horiz = filter_params_x->taps / 2 - 1;
  const int bits = FILTER_BITS - conv_params->round_1;
  (void)filter_params_y;
  (void)subpel_y_q4;

  // horizontal filter
  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
659
660
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
Yunqing Wang's avatar
Yunqing Wang committed
661
      CONV_BUF_TYPE res = 0;
662
      for (int k = 0; k < filter_params_x->taps; ++k) {
Yunqing Wang's avatar
Yunqing Wang committed
663
664
665
666
667
668
669
670
671
672
673
        res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
      }
      res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
    }
  }
}

674
675
676
677
678
679
680
681
void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
                            CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                            InterpFilterParams *filter_params_x,
                            InterpFilterParams *filter_params_y,
                            const int subpel_x_q4, const int subpel_y_q4,
                            ConvolveParams *conv_params) {
  const int bits =
      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
682

683
684
685
686
687
  (void)filter_params_x;
  (void)filter_params_y;
  (void)subpel_x_q4;
  (void)subpel_y_q4;

688
689
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
690
691
692
693
694
695
696
697
698
      CONV_BUF_TYPE res = (1 << bits) * src[y * src_stride + x];
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
    }
  }
}

699
#if CONFIG_JNT_COMP
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
                           CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                           InterpFilterParams *filter_params_x,
                           InterpFilterParams *filter_params_y,
                           const int subpel_x_q4, const int subpel_y_q4,
                           ConvolveParams *conv_params) {
  int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
  int im_h = h + filter_params_y->taps - 1;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;
  const int bd = 8;

  // horizontal filter
  const uint8_t *src_horiz = src - fo_vert * src_stride;
  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
717
718
  for (int y = 0; y < im_h; ++y) {
    for (int x = 0; x < w; ++x) {
719
      int32_t sum = (1 << (bd + FILTER_BITS - 1));
720
      for (int k = 0; k < filter_params_x->taps; ++k) {
721
722
723
724
725
726
727
728
729
730
731
732
733
        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
      }
      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
      im_block[y * im_stride + x] =
          ROUND_POWER_OF_TWO(sum, conv_params->round_0);
    }
  }

  // vertical filter
  int32_t *src_vert = im_block + fo_vert * im_stride;
  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
734
735
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
736
      CONV_BUF_TYPE sum = 1 << offset_bits;
737
      for (int k = 0; k < filter_params_y->taps; ++k) {
738
739
740
741
742
743
        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
      }
      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                          ((1 << (offset_bits - conv_params->round_1)) +
                           (1 << (offset_bits - conv_params->round_1 - 1)));
Cheng Chen's avatar
Cheng Chen committed
744
      if (conv_params->use_jnt_comp_avg) {
745
746
747
        if (conv_params->do_average) {
          dst[y * dst_stride + x] += res * conv_params->bck_offset;

Cheng Chen's avatar
Cheng Chen committed
748
749
          dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
                                                       DIST_PRECISION_BITS - 1);
750
751
752
        } else {
          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
        }
Cheng Chen's avatar
Cheng Chen committed
753
754
755
756
757
      } else {
        if (conv_params->do_average)
          dst[y * dst_stride + x] += res;
        else
          dst[y * dst_stride + x] = res;
758
      }
Angie Chiang's avatar
Angie Chiang committed
759
760
761
    }
  }
}
762
#endif  // CONFIG_JNT_COMP
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780

void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
                             CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                             InterpFilterParams *filter_params_x,
                             InterpFilterParams *filter_params_y,
                             const int subpel_x_qn, const int x_step_qn,
                             const int subpel_y_qn, const int y_step_qn,
                             ConvolveParams *conv_params) {
  int32_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
             filter_params_y->taps;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;
  const int bd = 8;

  // horizontal filter
  const uint8_t *src_horiz = src - fo_vert * src_stride;
781
  for (int y = 0; y < im_h; ++y) {
782
    int x_qn = subpel_x_qn;
783
    for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
784
785
786
787
788
789
      const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(x_filter_idx < SUBPEL_SHIFTS);
      const int16_t *x_filter =
          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
      int32_t sum = (1 << (bd + FILTER_BITS - 1));
790
      for (int k = 0; k < filter_params_x->taps; ++k) {
791
792
793
794
795
796
797
798
799
800
801
802
        sum += x_filter[k] * src_x[k - fo_horiz];
      }
      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
      im_block[y * im_stride + x] =
          ROUND_POWER_OF_TWO(sum, conv_params->round_0);
    }
    src_horiz += src_stride;
  }

  // vertical filter
  int32_t *src_vert = im_block + fo_vert * im_stride;
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
803
  for (int x = 0; x < w; ++x) {
804
    int y_qn = subpel_y_qn;
805
    for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
806
807
808
809
810
811
      const int32_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(y_filter_idx < SUBPEL_SHIFTS);
      const int16_t *y_filter =
          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
      CONV_BUF_TYPE sum = 1 << offset_bits;
812
      for (int k = 0; k < filter_params_y->taps; ++k) {
813
814
815
816
817
818
        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
      }
      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                          ((1 << (offset_bits - conv_params->round_1)) +
                           (1 << (offset_bits - conv_params->round_1 - 1)));
819
#if CONFIG_JNT_COMP
Cheng Chen's avatar
Cheng Chen committed
820
      if (conv_params->use_jnt_comp_avg) {
821
822
823
        if (conv_params->do_average) {
          dst[y * dst_stride + x] += res * conv_params->bck_offset;

Cheng Chen's avatar
Cheng Chen committed
824
825
          dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
                                                       DIST_PRECISION_BITS - 1);
826
827
828
        } else {
          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
        }
Cheng Chen's avatar
Cheng Chen committed
829
830
831
832
833
      } else {
        if (conv_params->do_average)
          dst[y * dst_stride + x] += res;
        else
          dst[y * dst_stride + x] = res;
834
835
      }
#else
836
837
838
839
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
840
#endif  // CONFIG_JNT_COMP
841
842
843
844
845
    }
    src_vert++;
  }
}
#endif  // CONFIG_COMPOUND_ROUND
Angie Chiang's avatar
Angie Chiang committed
846
847
848

void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                            int dst_stride, int w, int h,
849
850
851
                            InterpFilters interp_filters, const int subpel_x_q4,
                            int x_step_q4, const int subpel_y_q4, int y_step_q4,
                            int scaled, ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
852
853
854
855
  (void)x_step_q4;
  (void)y_step_q4;
  (void)dst;
  (void)dst_stride;
856
857

  InterpFilterParams filter_params_x, filter_params_y;
Zhijie Yang's avatar
Zhijie Yang committed
858
859
860
861
#if CONFIG_SHORT_FILTER
  av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
                                 &filter_params_y, w, h);
#else
862
863
  av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
                                 &filter_params_y);
Zhijie Yang's avatar
Zhijie Yang committed
864
#endif
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880

  if (filter_params_y.taps < filter_params_x.taps) {
    uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
                   (MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
    int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
    CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
    int tr_dst_stride = MAX_SB_SIZE;
    int fo_vert = filter_params_y.taps / 2 - 1;
    int fo_horiz = filter_params_x.taps / 2 - 1;

    transpose_uint8(tr_src, tr_src_stride,
                    src - fo_vert * src_stride - fo_horiz, src_stride,
                    w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
    transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
                    conv_params->dst_stride, w, h);

881
882
883
// horizontal and vertical parameters are swapped because of the transpose
#if CONFIG_JNT_COMP
    if (scaled)
884
885
886
887
      av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
                            tr_src_stride, tr_dst, tr_dst_stride, h, w,
                            &filter_params_y, &filter_params_x, subpel_y_q4,
                            y_step_q4, subpel_x_q4, x_step_q4, conv_params);
888
    else
889
890
891
892
      av1_jnt_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
                          tr_src_stride, tr_dst, tr_dst_stride, h, w,
                          &filter_params_y, &filter_params_x, subpel_y_q4,
                          subpel_x_q4, conv_params);
893
#else
894
895
896
897
898
899
900
901
902
903
    if (scaled)
      av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
                            tr_src_stride, tr_dst, tr_dst_stride, h, w,
                            &filter_params_y, &filter_params_x, subpel_y_q4,
                            y_step_q4, subpel_x_q4, x_step_q4, conv_params);
    else
      av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
                      tr_src_stride, tr_dst, tr_dst_stride, h, w,
                      &filter_params_y, &filter_params_x, subpel_y_q4,
                      subpel_x_q4, conv_params);
904
#endif  // CONFIG_JNT_COMP
905
906
907
    transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
                    tr_dst_stride, h, w);
  } else {
908
909
#if CONFIG_JNT_COMP
    if (scaled)
910
911
912
913
      av1_convolve_2d_scale(src, src_stride, conv_params->dst,
                            conv_params->dst_stride, w, h, &filter_params_x,
                            &filter_params_y, subpel_x_q4, x_step_q4,
                            subpel_y_q4, y_step_q4, conv_params);
914
    else
915
916
917
918
      av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
                          conv_params->dst_stride, w, h, &filter_params_x,
                          &filter_params_y, subpel_x_q4, subpel_y_q4,
                          conv_params);
919
#else
920
    if (scaled) {
921
922
923
924
      av1_convolve_2d_scale(src, src_stride, conv_params->dst,
                            conv_params->dst_stride, w, h, &filter_params_x,
                            &filter_params_y, subpel_x_q4, x_step_q4,
                            subpel_y_q4, y_step_q4, conv_params);
925
926
    } else {
#if CONFIG_COMPOUND_ROUND
927
928
929
      av1_convolve_2d(src, src_stride, conv_params->dst,
                      conv_params->dst_stride, w, h, &filter_params_x,
                      &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
930
#else
Yunqing Wang's avatar
Yunqing Wang committed
931
932
      // Special case convolve functions should produce the same result as
      // av1_convolve_2d.
933
934
935
936
937
      if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
        av1_convolve_2d_copy(src, src_stride, conv_params->dst,
                             conv_params->dst_stride, w, h, &filter_params_x,
                             &filter_params_y, subpel_x_q4, subpel_y_q4,
                             conv_params);
Yunqing Wang's avatar
Yunqing Wang committed
938
939
940
941
942
943
944
945
      } else if (subpel_x_q4 == 0) {
        av1_convolve_x(src, src_stride, conv_params->dst,
                       conv_params->dst_stride, w, h, &filter_params_x,
                       &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
      } else if (subpel_y_q4 == 0) {
        av1_convolve_y(src, src_stride, conv_params->dst,
                       conv_params->dst_stride, w, h, &filter_params_x,
                       &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
946
947
948
949
950
951
952
953
954
      } else {
        // subpel_x_q4 != 0 && subpel_y_q4 != 0
        av1_convolve_2d(src, src_stride, conv_params->dst,
                        conv_params->dst_stride, w, h, &filter_params_x,
                        &filter_params_y, subpel_x_q4, subpel_y_q4,
                        conv_params);
      }
#endif  // CONFIG_COMPOUND_ROUND
    }
955
#endif  // CONFIG_JNT_COMP
956
  }
Angie Chiang's avatar
Angie Chiang committed
957
958
}

959
#if CONFIG_HIGHBITDEPTH
960
961
962
void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride,
                                    uint8_t *dst8, int dst_stride, int w, int h,
                                    int bits, int bd) {
963
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
964
965
  for (int r = 0; r < h; ++r) {
    for (int c = 0; c < w; ++c) {
966
      dst[r * dst_stride + c] = clip_pixel_highbd(
967
          ROUND_POWER_OF_TWO(src[r * src_stride + c], bits), bd);
968
969
970
971
    }
  }
}

972
#if CONFIG_COMPOUND_ROUND
David Barker's avatar
David Barker committed
973
974
975
976
977
978
void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
                              CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                              InterpFilterParams *filter_params_x,
                              InterpFilterParams *filter_params_y,
                              const int subpel_x_q4, const int subpel_y_q4,
                              ConvolveParams *conv_params, int bd) {
979
  uint16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
980
981
982
983
  int im_h = h + filter_params_y->taps - 1;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;
984

985
986
987
988
  // horizontal filter
  const uint16_t *src_horiz = src - fo_vert * src_stride;
  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
989
990
  for (int y = 0; y < im_h; ++y) {
    for (int x = 0; x < w; ++x) {
991
      int32_t sum = 0;
992
      for (int k = 0; k < filter_params_x->taps; ++k) {
993
994
        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
      }
995
996
997
998
999
1000
1001
1002
1003
      im_block[y * im_stride + x] =
          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, conv_params->round_0), bd);
    }
  }

  // vertical filter
  uint16_t *src_vert = im_block + fo_vert * im_stride;
  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
1004
1005
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
1006
      CONV_BUF_TYPE sum = 0;
1007
      for (int k = 0; k < filter_params_y->taps; ++k) {
1008
1009
1010
        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
      }
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
1011
1012
1013
1014
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
1015
1016
1017
    }
  }
}
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035

void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
                                    CONV_BUF_TYPE *dst, int dst_stride, int w,
                                    int h, InterpFilterParams *filter_params_x,
                                    InterpFilterParams *filter_params_y,
                                    const int subpel_x_qn, const int x_step_qn,
                                    const int subpel_y_qn, const int y_step_qn,
                                    ConvolveParams *conv_params, int bd) {
  uint16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
             filter_params_y->taps;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;
  (void)bd;

  // horizontal filter
  const uint16_t *src_horiz = src - fo_vert * src_stride;
1036
  for (int y = 0; y < im_h; ++y) {
1037
    int x_qn = subpel_x_qn;
1038
    for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
1039
1040
1041
1042
1043
1044
      const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(x_filter_idx < SUBPEL_SHIFTS);
      const int16_t *x_filter =
          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
      int sum = 0;
1045
      for (int k = 0; k < filter_params_x->taps; ++k)
1046
1047
1048
1049
1050
1051
1052
1053
1054
        sum += x_filter[k] * src_x[k - fo_horiz];
      im_block[y * im_stride + x] =
          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
    }
    src_horiz += src_stride;
  }

  // vertical filter
  uint16_t *src_vert = im_block + fo_vert * im_stride;
1055
  for (int x = 0; x < w; ++x) {
1056
    int y_qn = subpel_y_qn;
1057
    for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
1058
1059
1060
1061
1062
1063
1064
      const uint16_t *const src_y =
          &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
      assert(y_filter_idx < SUBPEL_SHIFTS);
      const int16_t *y_filter =
          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
      CONV_BUF_TYPE sum = 0;
1065
      for (int k = 0; k < filter_params_y->taps; ++k) {
1066
1067
1068
        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
      }
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
1069
1070
1071
1072
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
1073
1074
1075
1076
1077
    }
    src_vert++;
  }
}

1078
#else
1079

David Barker's avatar
David Barker committed
1080
1081
1082
1083
1084
1085
void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
                              CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                              InterpFilterParams *filter_params_x,
                              InterpFilterParams *filter_params_y,
                              const int subpel_x_q4, const int subpel_y_q4,
                              ConvolveParams *conv_params, int bd) {
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
  int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
  int im_h = h + filter_params_y->taps - 1;
  int im_stride = w;
  const int fo_vert = filter_params_y->taps / 2 - 1;
  const int fo_horiz = filter_params_x->taps / 2 - 1;

  // horizontal filter
  const uint16_t *src_horiz = src - fo_vert * src_stride;
  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
1096
1097
  for (int y = 0; y < im_h; ++y) {
    for (int x = 0; x < w; ++x) {
1098
      int32_t sum = (1 << (bd + FILTER_BITS - 1));
1099
      for (int k = 0; k < filter_params_x->taps; ++k) {
1100
1101
1102
        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
      }
      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
1103
1104
      (void)bd;
      im_block[y * im_stride + x] =
1105
          ROUND_POWER_OF_TWO(sum, conv_params->round_0);
1106
1107
1108
1109
    }
  }

  // vertical filter
1110
1111
  int32_t *src_vert = im_block + fo_vert * im_stride;
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
1112
1113
  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
1114
1115
  for (int y = 0; y < h; ++y) {
    for (int x = 0; x < w; ++x) {
1116
      CONV_BUF_TYPE sum = 1 << offset_bits;
1117
      for (int k = 0; k < filter_params_y->taps; ++k) {
1118
1119
        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
      }
1120
1121
1122
1123
      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                          ((1 << (offset_bits - conv_params->round_1)) +
                           (1 << (offset_bits - conv_params->round_1 - 1)));
1124
1125
1126
1127
      if (conv_params->do_average)
        dst[y * dst_stride + x] += res;
      else
        dst[y * dst_stride + x] = res;
1128
1129
1130
    }