convolve.c 22.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
13
#include <string.h>
14

15
#include "./aom_dsp_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
16
#include "./av1_rtcd.h"
17
#include "av1/common/convolve.h"
18
#include "av1/common/filter.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_ports/mem.h"
21

22 23
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
24 25 26
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

Yaowu Xu's avatar
Yaowu Xu committed
27 28 29
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
30 31
                          const int subpel_x_q4, int x_step_q4,
                          ConvolveParams *conv_params) {
32
  int x, y;
33
  int filter_size = filter_params.taps;
34 35 36 37 38
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
39
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
40
          filter_params, x_q4 & SUBPEL_MASK);
41 42
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
43

44
      if (conv_params->round == CONVOLVE_OPT_ROUND) {
45
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
46 47 48 49 50 51 52 53 54 55 56 57
        if (conv_params->ref)
          dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
        else
          dst[x] = sum;
      } else {
        int tmp = conv_params->dst[y * conv_params->dst_stride + x];
        if (conv_params->ref)
          tmp = ROUND_POWER_OF_TWO(tmp + sum, 1);
        else
          tmp = sum;
        conv_params->dst[y * conv_params->dst_stride + x] = tmp;
      }
58

59 60 61 62 63 64 65
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
66 67 68
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
                         const InterpFilterParams filter_params,
69 70
                         const int subpel_y_q4, int y_step_q4,
                         ConvolveParams *conv_params) {
71
  int x, y;
72
  int filter_size = filter_params.taps;
73 74 75 76 77 78
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
79
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
80
          filter_params, y_q4 & SUBPEL_MASK);
81 82 83
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
84

85
      if (conv_params->round == CONVOLVE_OPT_ROUND) {
86
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
87 88 89 90 91 92 93 94 95 96 97 98 99
        if (conv_params->ref)
          dst[y * dst_stride] =
              ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
        else
          dst[y * dst_stride] = sum;
      } else {
        int tmp = conv_params->dst[y * conv_params->dst_stride + x];
        if (conv_params->ref)
          tmp = ROUND_POWER_OF_TWO(tmp + sum, 1);
        else
          tmp = sum;
        conv_params->dst[y * conv_params->dst_stride + x] = tmp;
      }
100

101 102 103 104 105 106 107
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
108
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
109 110 111 112
                          int dst_stride, int w, int h,
                          ConvolveParams *conv_params) {
  if (conv_params->ref == 0) {
    int r, c;
Angie Chiang's avatar
Angie Chiang committed
113
    for (r = 0; r < h; ++r) {
114 115 116 117 118 119 120
      if (conv_params->round == CONVOLVE_OPT_ROUND) {
        memcpy(dst, src, w);
      } else {
        for (c = 0; c < w; ++c)
          conv_params->dst[r * conv_params->dst_stride + c] = ((uint16_t)src[c])
                                                              << FILTER_BITS;
      }
Angie Chiang's avatar
Angie Chiang committed
121 122 123 124 125 126
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
127 128
      for (c = 0; c < w; ++c) {
        if (conv_params->round == CONVOLVE_OPT_ROUND) {
129
          dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
130 131 132 133 134 135 136
        } else {
          int tmp = conv_params->dst[r * conv_params->dst_stride + c];
          tmp =
              ROUND_POWER_OF_TWO(tmp + (((uint16_t)src[c]) << FILTER_BITS), 1);
          conv_params->dst[r * conv_params->dst_stride + c] = tmp;
        }
      }
Angie Chiang's avatar
Angie Chiang committed
137 138 139 140 141 142
      src += src_stride;
      dst += dst_stride;
    }
  }
}

143 144 145
void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, int w, int h,
                               const InterpFilterParams filter_params,
146 147
                               const int subpel_x_q4, int x_step_q4,
                               ConvolveParams *conv_params) {
148 149
  if (filter_params.taps == SUBPEL_TAPS &&
      conv_params->round == CONVOLVE_OPT_ROUND) {
150 151
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
152
    if (conv_params->ref == 0)
153 154 155 156 157 158
      aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                          NULL, -1, w, h);
    else
      aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x,
                              x_step_q4, NULL, -1, w, h);
  } else {
159 160 161 162 163 164 165 166
    if (conv_params->round == CONVOLVE_OPT_ROUND) {
      av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
                         subpel_x_q4, x_step_q4, conv_params);
    } else {
      // TODO(angiebird) need SIMD implementation here
      av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
                           filter_params, subpel_x_q4, x_step_q4, conv_params);
    }
167 168 169 170 171 172
  }
}

void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              const InterpFilterParams filter_params,
173 174
                              const int subpel_y_q4, int y_step_q4,
                              ConvolveParams *conv_params) {
175 176
  if (filter_params.taps == SUBPEL_TAPS &&
      conv_params->round == CONVOLVE_OPT_ROUND) {
177 178
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
179
    if (conv_params->ref == 0) {
180 181 182 183 184 185 186
      aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
                         y_step_q4, w, h);
    } else {
      aom_convolve8_avg_vert(src, src_stride, dst, dst_stride, NULL, -1,
                             filter_y, y_step_q4, w, h);
    }
  } else {
187 188 189 190 191 192 193 194
    if (conv_params->round == CONVOLVE_OPT_ROUND) {
      av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
                        subpel_y_q4, y_step_q4, conv_params);
    } else {
      // TODO(angiebird) need SIMD implementation here
      av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
                          subpel_y_q4, y_step_q4, conv_params);
    }
195 196 197
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
198 199
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                  int dst_stride, int w, int h,
200
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
201
                  const InterpFilter *interp_filter,
202
#else
James Zern's avatar
James Zern committed
203
                  const InterpFilter interp_filter,
204
#endif
Yaowu Xu's avatar
Yaowu Xu committed
205
                  const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
206
                  int y_step_q4, ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
207 208
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
209 210 211 212 213 214

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
215
  if (ignore_horiz && ignore_vert) {
216
    convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
Angie Chiang's avatar
Angie Chiang committed
217
  } else if (ignore_vert) {
218 219
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
220
        av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
221 222
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
223
        av1_get_interp_filter_params(interp_filter);
224 225
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
226
    av1_convolve_horiz_facade(src, src_stride, dst, dst_stride, w, h,
227 228
                              filter_params, subpel_x_q4, x_step_q4,
                              conv_params);
Angie Chiang's avatar
Angie Chiang committed
229
  } else if (ignore_horiz) {
230 231
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
232
        av1_get_interp_filter_params(interp_filter[2 * conv_params->ref]);
233 234
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
235
        av1_get_interp_filter_params(interp_filter);
236 237
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
238
    av1_convolve_vert_facade(src, src_stride, dst, dst_stride, w, h,
239 240
                             filter_params, subpel_y_q4, y_step_q4,
                             conv_params);
Angie Chiang's avatar
Angie Chiang committed
241
  } else {
242 243 244 245 246 247
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint8_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
248 249
    int filter_size;
    InterpFilterParams filter_params;
250
#if CONFIG_DUAL_FILTER
251
    InterpFilterParams filter_params_x =
252
        av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
253
    InterpFilterParams filter_params_y =
254 255 256
        av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
    if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
257 258 259 260
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
261

262 263 264 265
    // we do filter with fewer taps first to reduce hardware implementation
    // complexity
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
266
      int temp_stride = max_intermediate_size;
267 268
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
269
      temp_conv_params.round = CONVOLVE_OPT_ROUND;
270 271 272 273
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
274
      assert(intermediate_width <= max_intermediate_size);
Angie Chiang's avatar
Angie Chiang committed
275

276
      assert(filter_params.taps <= MAX_FILTER_TAP);
277

278 279
      av1_convolve_vert_facade(src - (filter_size / 2 - 1), src_stride, temp,
                               temp_stride, intermediate_width, h,
280 281
                               filter_params, subpel_y_q4, y_step_q4,
                               &temp_conv_params);
282

283 284
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);
285

286 287
      av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst,
                                dst_stride, w, h, filter_params, subpel_x_q4,
288
                                x_step_q4, conv_params);
289
    } else
290
#endif  // CONFIG_DUAL_FILTER
291 292
    {
      int intermediate_height;
293
      int temp_stride = MAX_SB_SIZE;
294 295
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
296
      temp_conv_params.round = CONVOLVE_OPT_ROUND;
297 298 299 300 301 302 303 304 305
#if CONFIG_DUAL_FILTER
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_params = av1_get_interp_filter_params(interp_filter);
      filter_size = filter_params.taps;
#endif
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
306 307
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;
308 309 310

      assert(filter_params.taps <= MAX_FILTER_TAP);

311 312 313
      av1_convolve_horiz_facade(src - src_stride * (filter_size / 2 - 1),
                                src_stride, temp, temp_stride, w,
                                intermediate_height, filter_params, subpel_x_q4,
314
                                x_step_q4, &temp_conv_params);
315 316 317 318 319 320

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      assert(filter_params.taps <= MAX_FILTER_TAP);

321 322 323
      av1_convolve_vert_facade(
          temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, conv_params);
324
    }
Angie Chiang's avatar
Angie Chiang committed
325
  }
326 327
}

Angie Chiang's avatar
Angie Chiang committed
328 329 330 331 332
void av1_convolve_init_c(void) {
  // A placeholder for SIMD initialization
  return;
}

Yaowu Xu's avatar
Yaowu Xu committed
333 334 335 336 337 338
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 const InterpFilterParams filter_params,
                                 const int subpel_x_q4, int x_step_q4, int avg,
                                 int bd) {
339
  int x, y;
340
  int filter_size = filter_params.taps;
341 342 343 344 345
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
346
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
347
          filter_params, x_q4 & SUBPEL_MASK);
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg)
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      else
        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
364 365 366 367 368
void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
                                uint16_t *dst, int dst_stride, int w, int h,
                                const InterpFilterParams filter_params,
                                const int subpel_y_q4, int y_step_q4, int avg,
                                int bd) {
369
  int x, y;
370
  int filter_size = filter_params.taps;
371 372 373 374 375 376
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
377
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
378
          filter_params, y_q4 & SUBPEL_MASK);
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      } else {
        dst[y * dst_stride] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 int avg, int bd) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w * sizeof(*src));
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
void av1_highbd_convolve_horiz_facade(const uint8_t *src8, int src_stride,
                                      uint8_t *dst8, int dst_stride, int w,
                                      int h,
                                      const InterpFilterParams filter_params,
                                      const int subpel_x_q4, int x_step_q4,
                                      int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
    if (avg == 0)
      aom_highbd_convolve8_horiz(src8, src_stride, dst8, dst_stride, filter_x,
                                 x_step_q4, NULL, -1, w, h, bd);
    else
      aom_highbd_convolve8_avg_horiz(src8, src_stride, dst8, dst_stride,
                                     filter_x, x_step_q4, NULL, -1, w, h, bd);
  } else {
    av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
                              filter_params, subpel_x_q4, x_step_q4, avg, bd);
  }
}

void av1_highbd_convolve_vert_facade(const uint8_t *src8, int src_stride,
                                     uint8_t *dst8, int dst_stride, int w,
                                     int h,
                                     const InterpFilterParams filter_params,
                                     const int subpel_y_q4, int y_step_q4,
                                     int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
451

452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
    if (avg == 0) {
      aom_highbd_convolve8_vert(src8, src_stride, dst8, dst_stride, NULL, -1,
                                filter_y, y_step_q4, w, h, bd);
    } else {
      aom_highbd_convolve8_avg_vert(src8, src_stride, dst8, dst_stride, NULL,
                                    -1, filter_y, y_step_q4, w, h, bd);
    }
  } else {
    av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
                             filter_params, subpel_y_q4, y_step_q4, avg, bd);
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
468 469
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                         int dst_stride, int w, int h,
470
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
471
                         const InterpFilter *interp_filter,
472
#else
James Zern's avatar
James Zern committed
473
                         const InterpFilter interp_filter,
474
#endif
Yaowu Xu's avatar
Yaowu Xu committed
475 476 477
                         const int subpel_x_q4, int x_step_q4,
                         const int subpel_y_q4, int y_step_q4, int ref_idx,
                         int bd) {
Angie Chiang's avatar
Angie Chiang committed
478 479 480 481
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
482 483 484 485 486 487

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
488
  if (ignore_horiz && ignore_vert) {
489
    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
490
  } else if (ignore_vert) {
491 492
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
493
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
494 495
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
496
        av1_get_interp_filter_params(interp_filter);
497
#endif
498 499 500
    av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
                                     filter_params, subpel_x_q4, x_step_q4,
                                     ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
501
  } else if (ignore_horiz) {
502 503
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
504
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
505 506
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
507
        av1_get_interp_filter_params(interp_filter);
508
#endif
509 510 511
    av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
                                    filter_params, subpel_y_q4, y_step_q4,
                                    ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
512
  } else {
513 514 515 516 517
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint16_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
Yaowu Xu's avatar
Yaowu Xu committed
518
    uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
519 520 521
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
    int filter_size;
    InterpFilterParams filter_params;
522
#if CONFIG_DUAL_FILTER
Yaowu Xu's avatar
Yaowu Xu committed
523 524 525 526
    InterpFilterParams filter_params_x =
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
    InterpFilterParams filter_params_y =
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
527 528 529 530 531 532
    if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
533
#endif
534

535 536 537 538 539 540 541 542 543 544 545
#if CONFIG_DUAL_FILTER
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
      int temp_stride = max_intermediate_size;
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_width <= max_intermediate_size);

      assert(filter_params.taps <= MAX_FILTER_TAP);
Angie Chiang's avatar
Angie Chiang committed
546

547 548 549
      av1_highbd_convolve_vert_facade(
          src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
          intermediate_width, h, filter_params, subpel_y_q4, y_step_q4, 0, bd);
550

551 552 553 554 555 556 557 558 559 560 561
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);

      av1_highbd_convolve_horiz_facade(
          temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
          filter_params, subpel_x_q4, x_step_q4, ref_idx, bd);
    } else
#endif  // CONFIG_DUAL_FILTER
    {
      int intermediate_height;
      int temp_stride = MAX_SB_SIZE;
562
#if CONFIG_DUAL_FILTER
563 564 565 566 567
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_params = av1_get_interp_filter_params(interp_filter);
      filter_size = filter_params.taps;
568
#endif
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;

      av1_highbd_convolve_horiz_facade(
          src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
          temp_stride, w, intermediate_height, filter_params, subpel_x_q4,
          x_step_q4, 0, bd);

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      filter_size = filter_params.taps;
      assert(filter_params.taps <= MAX_FILTER_TAP);
584

585 586 587 588
      av1_highbd_convolve_vert_facade(
          temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, ref_idx, bd);
    }
Angie Chiang's avatar
Angie Chiang committed
589
  }
590
}
Yaowu Xu's avatar
Yaowu Xu committed
591
#endif  // CONFIG_AOM_HIGHBITDEPTH