convolve.c 22.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
13
#include <string.h>
14

15
#include "./aom_dsp_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
16
#include "./av1_rtcd.h"
17
#include "av1/common/convolve.h"
18
#include "av1/common/filter.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_ports/mem.h"
21

22 23
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
24 25 26
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

Yaowu Xu's avatar
Yaowu Xu committed
27 28 29
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
30 31
                          const int subpel_x_q4, int x_step_q4,
                          ConvolveParams *conv_params) {
32
  int x, y;
33
  int filter_size = filter_params.taps;
34 35 36 37 38
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
39
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
40
          filter_params, x_q4 & SUBPEL_MASK);
41 42
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
43

44
      if (conv_params->round == CONVOLVE_OPT_ROUND) {
45
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
46 47 48 49 50 51 52 53 54 55
        if (conv_params->ref)
          dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
        else
          dst[x] = sum;
      } else {
        int tmp = conv_params->dst[y * conv_params->dst_stride + x];
        if (conv_params->ref)
          tmp = ROUND_POWER_OF_TWO(tmp + sum, 1);
        else
          tmp = sum;
Angie Chiang's avatar
Angie Chiang committed
56 57
        conv_params->dst[y * conv_params->dst_stride + x] =
            clamp(tmp, INT32_MIN, INT32_MAX);
58
      }
59

60 61 62 63 64 65 66
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
67 68 69
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
                         const InterpFilterParams filter_params,
70 71
                         const int subpel_y_q4, int y_step_q4,
                         ConvolveParams *conv_params) {
72
  int x, y;
73
  int filter_size = filter_params.taps;
74 75 76 77 78 79
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
80
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
81
          filter_params, y_q4 & SUBPEL_MASK);
82 83 84
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
85

86
      if (conv_params->round == CONVOLVE_OPT_ROUND) {
87
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
88 89 90 91 92 93 94 95 96 97 98
        if (conv_params->ref)
          dst[y * dst_stride] =
              ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
        else
          dst[y * dst_stride] = sum;
      } else {
        int tmp = conv_params->dst[y * conv_params->dst_stride + x];
        if (conv_params->ref)
          tmp = ROUND_POWER_OF_TWO(tmp + sum, 1);
        else
          tmp = sum;
Angie Chiang's avatar
Angie Chiang committed
99 100
        conv_params->dst[y * conv_params->dst_stride + x] =
            clamp(tmp, INT32_MIN, INT32_MAX);
101
      }
102

103 104 105 106 107 108 109
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
110
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
111 112 113 114
                          int dst_stride, int w, int h,
                          ConvolveParams *conv_params) {
  if (conv_params->ref == 0) {
    int r, c;
Angie Chiang's avatar
Angie Chiang committed
115
    for (r = 0; r < h; ++r) {
116 117 118 119 120 121 122
      if (conv_params->round == CONVOLVE_OPT_ROUND) {
        memcpy(dst, src, w);
      } else {
        for (c = 0; c < w; ++c)
          conv_params->dst[r * conv_params->dst_stride + c] = ((uint16_t)src[c])
                                                              << FILTER_BITS;
      }
Angie Chiang's avatar
Angie Chiang committed
123 124 125 126 127 128
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
129 130
      for (c = 0; c < w; ++c) {
        if (conv_params->round == CONVOLVE_OPT_ROUND) {
131
          dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
132 133
        } else {
          int tmp = conv_params->dst[r * conv_params->dst_stride + c];
Angie Chiang's avatar
Angie Chiang committed
134 135 136
          tmp = ROUND_POWER_OF_TWO(tmp + (((int32_t)src[c]) << FILTER_BITS), 1);
          conv_params->dst[r * conv_params->dst_stride + c] =
              clamp(tmp, INT32_MIN, INT32_MAX);
137 138
        }
      }
Angie Chiang's avatar
Angie Chiang committed
139 140 141 142 143 144
      src += src_stride;
      dst += dst_stride;
    }
  }
}

145 146 147
void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, int w, int h,
                               const InterpFilterParams filter_params,
148 149
                               const int subpel_x_q4, int x_step_q4,
                               ConvolveParams *conv_params) {
150 151
  if (filter_params.taps == SUBPEL_TAPS &&
      conv_params->round == CONVOLVE_OPT_ROUND) {
152 153
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
154
    if (conv_params->ref == 0)
155 156 157 158 159 160
      aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                          NULL, -1, w, h);
    else
      aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x,
                              x_step_q4, NULL, -1, w, h);
  } else {
161 162 163 164 165 166 167 168
    if (conv_params->round == CONVOLVE_OPT_ROUND) {
      av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
                         subpel_x_q4, x_step_q4, conv_params);
    } else {
      // TODO(angiebird) need SIMD implementation here
      av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
                           filter_params, subpel_x_q4, x_step_q4, conv_params);
    }
169 170 171 172 173 174
  }
}

void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              const InterpFilterParams filter_params,
175 176
                              const int subpel_y_q4, int y_step_q4,
                              ConvolveParams *conv_params) {
177 178
  if (filter_params.taps == SUBPEL_TAPS &&
      conv_params->round == CONVOLVE_OPT_ROUND) {
179 180
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
181
    if (conv_params->ref == 0) {
182 183 184 185 186 187 188
      aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
                         y_step_q4, w, h);
    } else {
      aom_convolve8_avg_vert(src, src_stride, dst, dst_stride, NULL, -1,
                             filter_y, y_step_q4, w, h);
    }
  } else {
189 190 191 192 193 194 195 196
    if (conv_params->round == CONVOLVE_OPT_ROUND) {
      av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
                        subpel_y_q4, y_step_q4, conv_params);
    } else {
      // TODO(angiebird) need SIMD implementation here
      av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
                          subpel_y_q4, y_step_q4, conv_params);
    }
197 198 199
  }
}

Angie Chiang's avatar
Angie Chiang committed
200
#if CONFIG_CONVOLVE_ROUND
Angie Chiang's avatar
Angie Chiang committed
201 202 203 204 205 206 207 208 209 210
void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
                           int dst_stride, int w, int h) {
  int r, c;
  for (r = 0; r < h; ++r) {
    for (c = 0; c < w; ++c) {
      dst[r * dst_stride + c] =
          clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], FILTER_BITS));
    }
  }
}
Angie Chiang's avatar
Angie Chiang committed
211
#endif  // CONFIG_CONVOLVE_ROUND
Angie Chiang's avatar
Angie Chiang committed
212

Yaowu Xu's avatar
Yaowu Xu committed
213 214
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                  int dst_stride, int w, int h,
215
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
216
                  const InterpFilter *interp_filter,
217
#else
James Zern's avatar
James Zern committed
218
                  const InterpFilter interp_filter,
219
#endif
Yaowu Xu's avatar
Yaowu Xu committed
220
                  const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
221
                  int y_step_q4, ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
222 223
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
Angie Chiang's avatar
Angie Chiang committed
224 225 226 227 228 229 230 231 232 233
#if CONFIG_DUAL_FILTER
  InterpFilterParams filter_params_x =
      av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
  InterpFilterParams filter_params_y =
      av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
  InterpFilterParams filter_params;
#else
  InterpFilterParams filter_params =
      av1_get_interp_filter_params(interp_filter);
#endif
234 235 236 237 238 239

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
240
  if (ignore_horiz && ignore_vert) {
241
    convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
Angie Chiang's avatar
Angie Chiang committed
242
  } else if (ignore_vert) {
243
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
244
    filter_params = filter_params_x;
245 246
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
247
    av1_convolve_horiz_facade(src, src_stride, dst, dst_stride, w, h,
248 249
                              filter_params, subpel_x_q4, x_step_q4,
                              conv_params);
Angie Chiang's avatar
Angie Chiang committed
250
  } else if (ignore_horiz) {
251
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
252
    filter_params = filter_params_y;
253 254
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
255
    av1_convolve_vert_facade(src, src_stride, dst, dst_stride, w, h,
256 257
                             filter_params, subpel_y_q4, y_step_q4,
                             conv_params);
Angie Chiang's avatar
Angie Chiang committed
258
  } else {
259 260 261 262 263 264
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint8_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
265
    int filter_size;
266
#if CONFIG_DUAL_FILTER
267 268
    if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
269 270 271 272
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
273

274 275 276 277
    // we do filter with fewer taps first to reduce hardware implementation
    // complexity
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
278
      int temp_stride = max_intermediate_size;
279 280
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
281
      temp_conv_params.round = CONVOLVE_OPT_ROUND;
282 283 284 285
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
286
      assert(intermediate_width <= max_intermediate_size);
Angie Chiang's avatar
Angie Chiang committed
287

288
      assert(filter_params.taps <= MAX_FILTER_TAP);
289

290 291
      av1_convolve_vert_facade(src - (filter_size / 2 - 1), src_stride, temp,
                               temp_stride, intermediate_width, h,
292 293
                               filter_params, subpel_y_q4, y_step_q4,
                               &temp_conv_params);
294

295 296
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);
297

298 299
      av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst,
                                dst_stride, w, h, filter_params, subpel_x_q4,
300
                                x_step_q4, conv_params);
301
    } else
302
#endif  // CONFIG_DUAL_FILTER
303 304
    {
      int intermediate_height;
305
      int temp_stride = MAX_SB_SIZE;
306 307
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
308
      temp_conv_params.round = CONVOLVE_OPT_ROUND;
309 310 311 312 313 314 315 316
#if CONFIG_DUAL_FILTER
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_size = filter_params.taps;
#endif
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
317 318
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;
319 320 321

      assert(filter_params.taps <= MAX_FILTER_TAP);

322 323 324
      av1_convolve_horiz_facade(src - src_stride * (filter_size / 2 - 1),
                                src_stride, temp, temp_stride, w,
                                intermediate_height, filter_params, subpel_x_q4,
325
                                x_step_q4, &temp_conv_params);
326 327 328 329 330 331

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      assert(filter_params.taps <= MAX_FILTER_TAP);

332 333 334
      av1_convolve_vert_facade(
          temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, conv_params);
335
    }
Angie Chiang's avatar
Angie Chiang committed
336
  }
337 338
}

Angie Chiang's avatar
Angie Chiang committed
339 340 341 342 343
void av1_convolve_init_c(void) {
  // A placeholder for SIMD initialization
  return;
}

Yaowu Xu's avatar
Yaowu Xu committed
344 345 346 347 348 349
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 const InterpFilterParams filter_params,
                                 const int subpel_x_q4, int x_step_q4, int avg,
                                 int bd) {
350
  int x, y;
351
  int filter_size = filter_params.taps;
352 353 354 355 356
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
357
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
358
          filter_params, x_q4 & SUBPEL_MASK);
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg)
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      else
        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
375 376 377 378 379
void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
                                uint16_t *dst, int dst_stride, int w, int h,
                                const InterpFilterParams filter_params,
                                const int subpel_y_q4, int y_step_q4, int avg,
                                int bd) {
380
  int x, y;
381
  int filter_size = filter_params.taps;
382 383 384 385 386 387
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
388
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
389
          filter_params, y_q4 & SUBPEL_MASK);
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      } else {
        dst[y * dst_stride] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 int avg, int bd) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w * sizeof(*src));
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
void av1_highbd_convolve_horiz_facade(const uint8_t *src8, int src_stride,
                                      uint8_t *dst8, int dst_stride, int w,
                                      int h,
                                      const InterpFilterParams filter_params,
                                      const int subpel_x_q4, int x_step_q4,
                                      int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
    if (avg == 0)
      aom_highbd_convolve8_horiz(src8, src_stride, dst8, dst_stride, filter_x,
                                 x_step_q4, NULL, -1, w, h, bd);
    else
      aom_highbd_convolve8_avg_horiz(src8, src_stride, dst8, dst_stride,
                                     filter_x, x_step_q4, NULL, -1, w, h, bd);
  } else {
    av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
                              filter_params, subpel_x_q4, x_step_q4, avg, bd);
  }
}

void av1_highbd_convolve_vert_facade(const uint8_t *src8, int src_stride,
                                     uint8_t *dst8, int dst_stride, int w,
                                     int h,
                                     const InterpFilterParams filter_params,
                                     const int subpel_y_q4, int y_step_q4,
                                     int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
462

463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
    if (avg == 0) {
      aom_highbd_convolve8_vert(src8, src_stride, dst8, dst_stride, NULL, -1,
                                filter_y, y_step_q4, w, h, bd);
    } else {
      aom_highbd_convolve8_avg_vert(src8, src_stride, dst8, dst_stride, NULL,
                                    -1, filter_y, y_step_q4, w, h, bd);
    }
  } else {
    av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
                             filter_params, subpel_y_q4, y_step_q4, avg, bd);
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
479 480
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                         int dst_stride, int w, int h,
481
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
482
                         const InterpFilter *interp_filter,
483
#else
James Zern's avatar
James Zern committed
484
                         const InterpFilter interp_filter,
485
#endif
Yaowu Xu's avatar
Yaowu Xu committed
486 487 488
                         const int subpel_x_q4, int x_step_q4,
                         const int subpel_y_q4, int y_step_q4, int ref_idx,
                         int bd) {
Angie Chiang's avatar
Angie Chiang committed
489 490 491 492
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
493 494 495 496 497 498

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
499
  if (ignore_horiz && ignore_vert) {
500
    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
501
  } else if (ignore_vert) {
502 503
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
504
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
505 506
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
507
        av1_get_interp_filter_params(interp_filter);
508
#endif
509 510 511
    av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
                                     filter_params, subpel_x_q4, x_step_q4,
                                     ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
512
  } else if (ignore_horiz) {
513 514
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
515
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
516 517
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
518
        av1_get_interp_filter_params(interp_filter);
519
#endif
520 521 522
    av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
                                    filter_params, subpel_y_q4, y_step_q4,
                                    ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
523
  } else {
524 525 526 527 528
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint16_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
Yaowu Xu's avatar
Yaowu Xu committed
529
    uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
530 531 532
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
    int filter_size;
    InterpFilterParams filter_params;
533
#if CONFIG_DUAL_FILTER
Yaowu Xu's avatar
Yaowu Xu committed
534 535 536 537
    InterpFilterParams filter_params_x =
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
    InterpFilterParams filter_params_y =
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
538 539 540 541 542 543
    if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
544
#endif
545

546 547 548 549 550 551 552 553 554 555 556
#if CONFIG_DUAL_FILTER
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
      int temp_stride = max_intermediate_size;
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_width <= max_intermediate_size);

      assert(filter_params.taps <= MAX_FILTER_TAP);
Angie Chiang's avatar
Angie Chiang committed
557

558 559 560
      av1_highbd_convolve_vert_facade(
          src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
          intermediate_width, h, filter_params, subpel_y_q4, y_step_q4, 0, bd);
561

562 563 564 565 566 567 568 569 570 571 572
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);

      av1_highbd_convolve_horiz_facade(
          temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
          filter_params, subpel_x_q4, x_step_q4, ref_idx, bd);
    } else
#endif  // CONFIG_DUAL_FILTER
    {
      int intermediate_height;
      int temp_stride = MAX_SB_SIZE;
573
#if CONFIG_DUAL_FILTER
574 575 576 577 578
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_params = av1_get_interp_filter_params(interp_filter);
      filter_size = filter_params.taps;
579
#endif
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;

      av1_highbd_convolve_horiz_facade(
          src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
          temp_stride, w, intermediate_height, filter_params, subpel_x_q4,
          x_step_q4, 0, bd);

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      filter_size = filter_params.taps;
      assert(filter_params.taps <= MAX_FILTER_TAP);
595

596 597 598 599
      av1_highbd_convolve_vert_facade(
          temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, ref_idx, bd);
    }
Angie Chiang's avatar
Angie Chiang committed
600
  }
601
}
Yaowu Xu's avatar
Yaowu Xu committed
602
#endif  // CONFIG_AOM_HIGHBITDEPTH