convolve.c 20.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
13
#include <string.h>
14

15
#include "./aom_dsp_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
16
#include "./av1_rtcd.h"
17
#include "av1/common/convolve.h"
18
#include "av1/common/filter.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_ports/mem.h"
21

22 23
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
24 25 26
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

Yaowu Xu's avatar
Yaowu Xu committed
27 28 29
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
30 31
                          const int subpel_x_q4, int x_step_q4,
                          ConvolveParams *conv_params) {
32
  int x, y;
33
  int filter_size = filter_params.taps;
34 35 36 37 38
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
39
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
40
          filter_params, x_q4 & SUBPEL_MASK);
41 42
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
43 44 45 46 47 48 49 50 51

      if (conv_params->round)
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

      if (conv_params->ref)
        dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
      else
        dst[x] = sum;

52 53 54 55 56 57 58
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
59 60 61
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
                         const InterpFilterParams filter_params,
62 63
                         const int subpel_y_q4, int y_step_q4,
                         ConvolveParams *conv_params) {
64
  int x, y;
65
  int filter_size = filter_params.taps;
66 67 68 69 70 71
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
72
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
73
          filter_params, y_q4 & SUBPEL_MASK);
74 75 76
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
77 78 79 80 81 82 83 84 85

      if (conv_params->round)
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

      if (conv_params->ref)
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
      else
        dst[y * dst_stride] = sum;

86 87 88 89 90 91 92
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
93
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
94 95 96 97
                          int dst_stride, int w, int h,
                          ConvolveParams *conv_params) {
  if (conv_params->ref == 0) {
    int r, c;
Angie Chiang's avatar
Angie Chiang committed
98 99
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w);
100 101
      if (conv_params->round == 0)
        for (c = 0; c < w; ++c) dst[c] = dst[c] << FILTER_BITS;
Angie Chiang's avatar
Angie Chiang committed
102 103 104 105 106 107
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
108 109 110 111 112 113 114
      if (conv_params->round)
        for (c = 0; c < w; ++c)
          dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
      else
        for (c = 0; c < w; ++c)
          dst[c] = clip_pixel(
              ROUND_POWER_OF_TWO(dst[c] + (src[c] << FILTER_BITS), 1));
Angie Chiang's avatar
Angie Chiang committed
115 116 117 118 119 120
      src += src_stride;
      dst += dst_stride;
    }
  }
}

121 122 123
void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, int w, int h,
                               const InterpFilterParams filter_params,
124 125 126
                               const int subpel_x_q4, int x_step_q4,
                               ConvolveParams *conv_params) {
  if (filter_params.taps == SUBPEL_TAPS && conv_params->round == 1) {
127 128
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
129
    if (conv_params->ref == 0)
130 131 132 133 134 135 136
      aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                          NULL, -1, w, h);
    else
      aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x,
                              x_step_q4, NULL, -1, w, h);
  } else {
    av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
137
                       subpel_x_q4, x_step_q4, conv_params);
138 139 140 141 142 143
  }
}

void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              const InterpFilterParams filter_params,
144 145 146
                              const int subpel_y_q4, int y_step_q4,
                              ConvolveParams *conv_params) {
  if (filter_params.taps == SUBPEL_TAPS && conv_params->round == 1) {
147 148
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
149
    if (conv_params->ref == 0) {
150 151 152 153 154 155 156 157
      aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
                         y_step_q4, w, h);
    } else {
      aom_convolve8_avg_vert(src, src_stride, dst, dst_stride, NULL, -1,
                             filter_y, y_step_q4, w, h);
    }
  } else {
    av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
158
                      subpel_y_q4, y_step_q4, conv_params);
159 160 161
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
162 163
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                  int dst_stride, int w, int h,
164
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
165
                  const InterpFilter *interp_filter,
166
#else
James Zern's avatar
James Zern committed
167
                  const InterpFilter interp_filter,
168
#endif
Yaowu Xu's avatar
Yaowu Xu committed
169
                  const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
170
                  int y_step_q4, ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
171 172
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
173 174 175 176 177 178

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
179
  if (ignore_horiz && ignore_vert) {
180
    convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
Angie Chiang's avatar
Angie Chiang committed
181
  } else if (ignore_vert) {
182 183
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
184
        av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
185 186
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
187
        av1_get_interp_filter_params(interp_filter);
188 189
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
190
    av1_convolve_horiz_facade(src, src_stride, dst, dst_stride, w, h,
191 192
                              filter_params, subpel_x_q4, x_step_q4,
                              conv_params);
Angie Chiang's avatar
Angie Chiang committed
193
  } else if (ignore_horiz) {
194 195
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
196
        av1_get_interp_filter_params(interp_filter[2 * conv_params->ref]);
197 198
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
199
        av1_get_interp_filter_params(interp_filter);
200 201
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
202
    av1_convolve_vert_facade(src, src_stride, dst, dst_stride, w, h,
203 204
                             filter_params, subpel_y_q4, y_step_q4,
                             conv_params);
Angie Chiang's avatar
Angie Chiang committed
205
  } else {
206 207 208 209 210 211
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint8_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
212 213
    int filter_size;
    InterpFilterParams filter_params;
214
#if CONFIG_DUAL_FILTER
215
    InterpFilterParams filter_params_x =
216
        av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
217
    InterpFilterParams filter_params_y =
218 219 220
        av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
    if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
221 222 223 224
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
225

226 227 228 229
    // we do filter with fewer taps first to reduce hardware implementation
    // complexity
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
230
      int temp_stride = max_intermediate_size;
231 232 233
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
      temp_conv_params.round = 1;
234 235 236 237
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
238
      assert(intermediate_width <= max_intermediate_size);
Angie Chiang's avatar
Angie Chiang committed
239

240
      assert(filter_params.taps <= MAX_FILTER_TAP);
241

242 243
      av1_convolve_vert_facade(src - (filter_size / 2 - 1), src_stride, temp,
                               temp_stride, intermediate_width, h,
244 245
                               filter_params, subpel_y_q4, y_step_q4,
                               &temp_conv_params);
246

247 248
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);
249

250 251
      av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst,
                                dst_stride, w, h, filter_params, subpel_x_q4,
252
                                x_step_q4, conv_params);
253
    } else
254
#endif  // CONFIG_DUAL_FILTER
255 256
    {
      int intermediate_height;
257
      int temp_stride = MAX_SB_SIZE;
258 259 260
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
      temp_conv_params.round = 1;
261 262 263 264 265 266 267 268 269
#if CONFIG_DUAL_FILTER
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_params = av1_get_interp_filter_params(interp_filter);
      filter_size = filter_params.taps;
#endif
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
270 271
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;
272 273 274

      assert(filter_params.taps <= MAX_FILTER_TAP);

275 276 277
      av1_convolve_horiz_facade(src - src_stride * (filter_size / 2 - 1),
                                src_stride, temp, temp_stride, w,
                                intermediate_height, filter_params, subpel_x_q4,
278
                                x_step_q4, &temp_conv_params);
279 280 281 282 283 284

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      assert(filter_params.taps <= MAX_FILTER_TAP);

285 286 287
      av1_convolve_vert_facade(
          temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, conv_params);
288
    }
Angie Chiang's avatar
Angie Chiang committed
289
  }
290 291
}

Angie Chiang's avatar
Angie Chiang committed
292 293 294 295 296
void av1_convolve_init_c(void) {
  // A placeholder for SIMD initialization
  return;
}

Yaowu Xu's avatar
Yaowu Xu committed
297 298 299 300 301 302
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 const InterpFilterParams filter_params,
                                 const int subpel_x_q4, int x_step_q4, int avg,
                                 int bd) {
303
  int x, y;
304
  int filter_size = filter_params.taps;
305 306 307 308 309
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
310
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
311
          filter_params, x_q4 & SUBPEL_MASK);
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg)
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      else
        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
328 329 330 331 332
void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
                                uint16_t *dst, int dst_stride, int w, int h,
                                const InterpFilterParams filter_params,
                                const int subpel_y_q4, int y_step_q4, int avg,
                                int bd) {
333
  int x, y;
334
  int filter_size = filter_params.taps;
335 336 337 338 339 340
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
341
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
342
          filter_params, y_q4 & SUBPEL_MASK);
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      } else {
        dst[y * dst_stride] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 int avg, int bd) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w * sizeof(*src));
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
void av1_highbd_convolve_horiz_facade(const uint8_t *src8, int src_stride,
                                      uint8_t *dst8, int dst_stride, int w,
                                      int h,
                                      const InterpFilterParams filter_params,
                                      const int subpel_x_q4, int x_step_q4,
                                      int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
    if (avg == 0)
      aom_highbd_convolve8_horiz(src8, src_stride, dst8, dst_stride, filter_x,
                                 x_step_q4, NULL, -1, w, h, bd);
    else
      aom_highbd_convolve8_avg_horiz(src8, src_stride, dst8, dst_stride,
                                     filter_x, x_step_q4, NULL, -1, w, h, bd);
  } else {
    av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
                              filter_params, subpel_x_q4, x_step_q4, avg, bd);
  }
}

void av1_highbd_convolve_vert_facade(const uint8_t *src8, int src_stride,
                                     uint8_t *dst8, int dst_stride, int w,
                                     int h,
                                     const InterpFilterParams filter_params,
                                     const int subpel_y_q4, int y_step_q4,
                                     int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
415

416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
    if (avg == 0) {
      aom_highbd_convolve8_vert(src8, src_stride, dst8, dst_stride, NULL, -1,
                                filter_y, y_step_q4, w, h, bd);
    } else {
      aom_highbd_convolve8_avg_vert(src8, src_stride, dst8, dst_stride, NULL,
                                    -1, filter_y, y_step_q4, w, h, bd);
    }
  } else {
    av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
                             filter_params, subpel_y_q4, y_step_q4, avg, bd);
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
432 433
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                         int dst_stride, int w, int h,
434
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
435
                         const InterpFilter *interp_filter,
436
#else
James Zern's avatar
James Zern committed
437
                         const InterpFilter interp_filter,
438
#endif
Yaowu Xu's avatar
Yaowu Xu committed
439 440 441
                         const int subpel_x_q4, int x_step_q4,
                         const int subpel_y_q4, int y_step_q4, int ref_idx,
                         int bd) {
Angie Chiang's avatar
Angie Chiang committed
442 443 444 445
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
446 447 448 449 450 451

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
452
  if (ignore_horiz && ignore_vert) {
453
    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
454
  } else if (ignore_vert) {
455 456
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
457
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
458 459
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
460
        av1_get_interp_filter_params(interp_filter);
461
#endif
462 463 464
    av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
                                     filter_params, subpel_x_q4, x_step_q4,
                                     ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
465
  } else if (ignore_horiz) {
466 467
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
468
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
469 470
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
471
        av1_get_interp_filter_params(interp_filter);
472
#endif
473 474 475
    av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
                                    filter_params, subpel_y_q4, y_step_q4,
                                    ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
476
  } else {
477 478 479 480 481
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint16_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
Yaowu Xu's avatar
Yaowu Xu committed
482
    uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
483 484 485
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
    int filter_size;
    InterpFilterParams filter_params;
486
#if CONFIG_DUAL_FILTER
Yaowu Xu's avatar
Yaowu Xu committed
487 488 489 490
    InterpFilterParams filter_params_x =
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
    InterpFilterParams filter_params_y =
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
491 492 493 494 495 496
    if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
497
#endif
498

499 500 501 502 503 504 505 506 507 508 509
#if CONFIG_DUAL_FILTER
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
      int temp_stride = max_intermediate_size;
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_width <= max_intermediate_size);

      assert(filter_params.taps <= MAX_FILTER_TAP);
Angie Chiang's avatar
Angie Chiang committed
510

511 512 513
      av1_highbd_convolve_vert_facade(
          src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
          intermediate_width, h, filter_params, subpel_y_q4, y_step_q4, 0, bd);
514

515 516 517 518 519 520 521 522 523 524 525
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);

      av1_highbd_convolve_horiz_facade(
          temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
          filter_params, subpel_x_q4, x_step_q4, ref_idx, bd);
    } else
#endif  // CONFIG_DUAL_FILTER
    {
      int intermediate_height;
      int temp_stride = MAX_SB_SIZE;
526
#if CONFIG_DUAL_FILTER
527 528 529 530 531
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_params = av1_get_interp_filter_params(interp_filter);
      filter_size = filter_params.taps;
532
#endif
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;

      av1_highbd_convolve_horiz_facade(
          src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
          temp_stride, w, intermediate_height, filter_params, subpel_x_q4,
          x_step_q4, 0, bd);

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      filter_size = filter_params.taps;
      assert(filter_params.taps <= MAX_FILTER_TAP);
548

549 550 551 552
      av1_highbd_convolve_vert_facade(
          temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, ref_idx, bd);
    }
Angie Chiang's avatar
Angie Chiang committed
553
  }
554
}
Yaowu Xu's avatar
Yaowu Xu committed
555
#endif  // CONFIG_AOM_HIGHBITDEPTH