convolve.c 21.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
13
#include <string.h>
14

15
#include "./aom_dsp_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
16
#include "./av1_rtcd.h"
17
#include "av1/common/convolve.h"
18
#include "av1/common/filter.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_ports/mem.h"
21

22 23
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
24 25 26
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

Yaowu Xu's avatar
Yaowu Xu committed
27 28 29
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
30 31
                          const int subpel_x_q4, int x_step_q4,
                          ConvolveParams *conv_params) {
32
  int x, y;
33
  int filter_size = filter_params.taps;
34 35 36 37 38
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
39
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
40
          filter_params, x_q4 & SUBPEL_MASK);
41 42
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
43

44
      if (conv_params->round == CONVOLVE_OPT_ROUND)
45 46 47 48 49 50 51
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

      if (conv_params->ref)
        dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
      else
        dst[x] = sum;

52 53 54 55 56 57 58
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
59 60 61
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
                         const InterpFilterParams filter_params,
62 63
                         const int subpel_y_q4, int y_step_q4,
                         ConvolveParams *conv_params) {
64
  int x, y;
65
  int filter_size = filter_params.taps;
66 67 68 69 70 71
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
72
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
73
          filter_params, y_q4 & SUBPEL_MASK);
74 75 76
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
77

78
      if (conv_params->round == CONVOLVE_OPT_ROUND)
79 80 81 82 83 84 85
        sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

      if (conv_params->ref)
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
      else
        dst[y * dst_stride] = sum;

86 87 88 89 90 91 92
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
93
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
94 95 96 97
                          int dst_stride, int w, int h,
                          ConvolveParams *conv_params) {
  if (conv_params->ref == 0) {
    int r, c;
Angie Chiang's avatar
Angie Chiang committed
98 99
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w);
100
      if (conv_params->round == CONVOLVE_OPT_NO_ROUND)
101
        for (c = 0; c < w; ++c) dst[c] = dst[c] << FILTER_BITS;
Angie Chiang's avatar
Angie Chiang committed
102 103 104 105 106 107
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
108
      if (conv_params->round == CONVOLVE_OPT_ROUND)
109 110 111 112 113 114
        for (c = 0; c < w; ++c)
          dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
      else
        for (c = 0; c < w; ++c)
          dst[c] = clip_pixel(
              ROUND_POWER_OF_TWO(dst[c] + (src[c] << FILTER_BITS), 1));
Angie Chiang's avatar
Angie Chiang committed
115 116 117 118 119 120
      src += src_stride;
      dst += dst_stride;
    }
  }
}

121 122 123
void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, int w, int h,
                               const InterpFilterParams filter_params,
124 125
                               const int subpel_x_q4, int x_step_q4,
                               ConvolveParams *conv_params) {
126 127
  if (filter_params.taps == SUBPEL_TAPS &&
      conv_params->round == CONVOLVE_OPT_ROUND) {
128 129
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
130
    if (conv_params->ref == 0)
131 132 133 134 135 136
      aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
                          NULL, -1, w, h);
    else
      aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x,
                              x_step_q4, NULL, -1, w, h);
  } else {
137 138 139 140 141 142 143 144
    if (conv_params->round == CONVOLVE_OPT_ROUND) {
      av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
                         subpel_x_q4, x_step_q4, conv_params);
    } else {
      // TODO(angiebird) need SIMD implementation here
      av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
                           filter_params, subpel_x_q4, x_step_q4, conv_params);
    }
145 146 147 148 149 150
  }
}

void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              const InterpFilterParams filter_params,
151 152
                              const int subpel_y_q4, int y_step_q4,
                              ConvolveParams *conv_params) {
153 154
  if (filter_params.taps == SUBPEL_TAPS &&
      conv_params->round == CONVOLVE_OPT_ROUND) {
155 156
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
157
    if (conv_params->ref == 0) {
158 159 160 161 162 163 164
      aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
                         y_step_q4, w, h);
    } else {
      aom_convolve8_avg_vert(src, src_stride, dst, dst_stride, NULL, -1,
                             filter_y, y_step_q4, w, h);
    }
  } else {
165 166 167 168 169 170 171 172
    if (conv_params->round == CONVOLVE_OPT_ROUND) {
      av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
                        subpel_y_q4, y_step_q4, conv_params);
    } else {
      // TODO(angiebird) need SIMD implementation here
      av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
                          subpel_y_q4, y_step_q4, conv_params);
    }
173 174 175
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
176 177
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                  int dst_stride, int w, int h,
178
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
179
                  const InterpFilter *interp_filter,
180
#else
James Zern's avatar
James Zern committed
181
                  const InterpFilter interp_filter,
182
#endif
Yaowu Xu's avatar
Yaowu Xu committed
183
                  const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
184
                  int y_step_q4, ConvolveParams *conv_params) {
Angie Chiang's avatar
Angie Chiang committed
185 186
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
187 188 189 190 191 192

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
193
  if (ignore_horiz && ignore_vert) {
194
    convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
Angie Chiang's avatar
Angie Chiang committed
195
  } else if (ignore_vert) {
196 197
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
198
        av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
199 200
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
201
        av1_get_interp_filter_params(interp_filter);
202 203
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
204
    av1_convolve_horiz_facade(src, src_stride, dst, dst_stride, w, h,
205 206
                              filter_params, subpel_x_q4, x_step_q4,
                              conv_params);
Angie Chiang's avatar
Angie Chiang committed
207
  } else if (ignore_horiz) {
208 209
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
210
        av1_get_interp_filter_params(interp_filter[2 * conv_params->ref]);
211 212
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
213
        av1_get_interp_filter_params(interp_filter);
214 215
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
216
    av1_convolve_vert_facade(src, src_stride, dst, dst_stride, w, h,
217 218
                             filter_params, subpel_y_q4, y_step_q4,
                             conv_params);
Angie Chiang's avatar
Angie Chiang committed
219
  } else {
220 221 222 223 224 225
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint8_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
226 227
    int filter_size;
    InterpFilterParams filter_params;
228
#if CONFIG_DUAL_FILTER
229
    InterpFilterParams filter_params_x =
230
        av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
231
    InterpFilterParams filter_params_y =
232 233 234
        av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
    if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
235 236 237 238
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
239

240 241 242 243
    // we do filter with fewer taps first to reduce hardware implementation
    // complexity
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
244
      int temp_stride = max_intermediate_size;
245 246
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
247
      temp_conv_params.round = CONVOLVE_OPT_ROUND;
248 249 250 251
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
252
      assert(intermediate_width <= max_intermediate_size);
Angie Chiang's avatar
Angie Chiang committed
253

254
      assert(filter_params.taps <= MAX_FILTER_TAP);
255

256 257
      av1_convolve_vert_facade(src - (filter_size / 2 - 1), src_stride, temp,
                               temp_stride, intermediate_width, h,
258 259
                               filter_params, subpel_y_q4, y_step_q4,
                               &temp_conv_params);
260

261 262
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);
263

264 265
      av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst,
                                dst_stride, w, h, filter_params, subpel_x_q4,
266
                                x_step_q4, conv_params);
267
    } else
268
#endif  // CONFIG_DUAL_FILTER
269 270
    {
      int intermediate_height;
271
      int temp_stride = MAX_SB_SIZE;
272 273
      ConvolveParams temp_conv_params;
      temp_conv_params.ref = 0;
274
      temp_conv_params.round = CONVOLVE_OPT_ROUND;
275 276 277 278 279 280 281 282 283
#if CONFIG_DUAL_FILTER
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_params = av1_get_interp_filter_params(interp_filter);
      filter_size = filter_params.taps;
#endif
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
284 285
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;
286 287 288

      assert(filter_params.taps <= MAX_FILTER_TAP);

289 290 291
      av1_convolve_horiz_facade(src - src_stride * (filter_size / 2 - 1),
                                src_stride, temp, temp_stride, w,
                                intermediate_height, filter_params, subpel_x_q4,
292
                                x_step_q4, &temp_conv_params);
293 294 295 296 297 298

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      assert(filter_params.taps <= MAX_FILTER_TAP);

299 300 301
      av1_convolve_vert_facade(
          temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, conv_params);
302
    }
Angie Chiang's avatar
Angie Chiang committed
303
  }
304 305
}

Angie Chiang's avatar
Angie Chiang committed
306 307 308 309 310
void av1_convolve_init_c(void) {
  // A placeholder for SIMD initialization
  return;
}

Yaowu Xu's avatar
Yaowu Xu committed
311 312 313 314 315 316
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 const InterpFilterParams filter_params,
                                 const int subpel_x_q4, int x_step_q4, int avg,
                                 int bd) {
317
  int x, y;
318
  int filter_size = filter_params.taps;
319 320 321 322 323
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
324
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
325
          filter_params, x_q4 & SUBPEL_MASK);
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg)
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      else
        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
342 343 344 345 346
void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
                                uint16_t *dst, int dst_stride, int w, int h,
                                const InterpFilterParams filter_params,
                                const int subpel_y_q4, int y_step_q4, int avg,
                                int bd) {
347
  int x, y;
348
  int filter_size = filter_params.taps;
349 350 351 352 353 354
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
355
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
356
          filter_params, y_q4 & SUBPEL_MASK);
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      } else {
        dst[y * dst_stride] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 int avg, int bd) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w * sizeof(*src));
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
void av1_highbd_convolve_horiz_facade(const uint8_t *src8, int src_stride,
                                      uint8_t *dst8, int dst_stride, int w,
                                      int h,
                                      const InterpFilterParams filter_params,
                                      const int subpel_x_q4, int x_step_q4,
                                      int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_x =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
    if (avg == 0)
      aom_highbd_convolve8_horiz(src8, src_stride, dst8, dst_stride, filter_x,
                                 x_step_q4, NULL, -1, w, h, bd);
    else
      aom_highbd_convolve8_avg_horiz(src8, src_stride, dst8, dst_stride,
                                     filter_x, x_step_q4, NULL, -1, w, h, bd);
  } else {
    av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
                              filter_params, subpel_x_q4, x_step_q4, avg, bd);
  }
}

void av1_highbd_convolve_vert_facade(const uint8_t *src8, int src_stride,
                                     uint8_t *dst8, int dst_stride, int w,
                                     int h,
                                     const InterpFilterParams filter_params,
                                     const int subpel_y_q4, int y_step_q4,
                                     int avg, int bd) {
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
429

430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
  if (filter_params.taps == SUBPEL_TAPS) {
    const int16_t *filter_y =
        av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
    if (avg == 0) {
      aom_highbd_convolve8_vert(src8, src_stride, dst8, dst_stride, NULL, -1,
                                filter_y, y_step_q4, w, h, bd);
    } else {
      aom_highbd_convolve8_avg_vert(src8, src_stride, dst8, dst_stride, NULL,
                                    -1, filter_y, y_step_q4, w, h, bd);
    }
  } else {
    av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
                             filter_params, subpel_y_q4, y_step_q4, avg, bd);
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
446 447
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                         int dst_stride, int w, int h,
448
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
449
                         const InterpFilter *interp_filter,
450
#else
James Zern's avatar
James Zern committed
451
                         const InterpFilter interp_filter,
452
#endif
Yaowu Xu's avatar
Yaowu Xu committed
453 454 455
                         const int subpel_x_q4, int x_step_q4,
                         const int subpel_y_q4, int y_step_q4, int ref_idx,
                         int bd) {
Angie Chiang's avatar
Angie Chiang committed
456 457 458 459
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
460 461 462 463 464 465

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
466
  if (ignore_horiz && ignore_vert) {
467
    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
468
  } else if (ignore_vert) {
469 470
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
471
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
472 473
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
474
        av1_get_interp_filter_params(interp_filter);
475
#endif
476 477 478
    av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
                                     filter_params, subpel_x_q4, x_step_q4,
                                     ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
479
  } else if (ignore_horiz) {
480 481
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
482
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
483 484
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
485
        av1_get_interp_filter_params(interp_filter);
486
#endif
487 488 489
    av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
                                    filter_params, subpel_y_q4, y_step_q4,
                                    ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
490
  } else {
491 492 493 494 495
    // temp's size is set to a 256 aligned value to facilitate SIMD
    // implementation. The value is greater than (maximum possible intermediate
    // height or width) * MAX_SB_SIZE
    DECLARE_ALIGNED(16, uint16_t,
                    temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
Yaowu Xu's avatar
Yaowu Xu committed
496
    uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
497 498 499
    int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
    int filter_size;
    InterpFilterParams filter_params;
500
#if CONFIG_DUAL_FILTER
Yaowu Xu's avatar
Yaowu Xu committed
501 502 503 504
    InterpFilterParams filter_params_x =
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
    InterpFilterParams filter_params_y =
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
505 506 507 508 509 510
    if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
        interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
      // Avoid two directions both using 12-tap filter.
      // This will reduce hardware implementation cost.
      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
    }
511
#endif
512

513 514 515 516 517 518 519 520 521 522 523
#if CONFIG_DUAL_FILTER
    if (filter_params_y.taps < filter_params_x.taps) {
      int intermediate_width;
      int temp_stride = max_intermediate_size;
      filter_params = filter_params_y;
      filter_size = filter_params_x.taps;
      intermediate_width =
          (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_width <= max_intermediate_size);

      assert(filter_params.taps <= MAX_FILTER_TAP);
Angie Chiang's avatar
Angie Chiang committed
524

525 526 527
      av1_highbd_convolve_vert_facade(
          src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
          intermediate_width, h, filter_params, subpel_y_q4, y_step_q4, 0, bd);
528

529 530 531 532 533 534 535 536 537 538 539
      filter_params = filter_params_x;
      assert(filter_params.taps <= MAX_FILTER_TAP);

      av1_highbd_convolve_horiz_facade(
          temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
          filter_params, subpel_x_q4, x_step_q4, ref_idx, bd);
    } else
#endif  // CONFIG_DUAL_FILTER
    {
      int intermediate_height;
      int temp_stride = MAX_SB_SIZE;
540
#if CONFIG_DUAL_FILTER
541 542 543 544 545
      filter_params = filter_params_x;
      filter_size = filter_params_y.taps;
#else
      filter_params = av1_get_interp_filter_params(interp_filter);
      filter_size = filter_params.taps;
546
#endif
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
      intermediate_height =
          (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
      assert(intermediate_height <= max_intermediate_size);
      (void)max_intermediate_size;

      av1_highbd_convolve_horiz_facade(
          src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
          temp_stride, w, intermediate_height, filter_params, subpel_x_q4,
          x_step_q4, 0, bd);

#if CONFIG_DUAL_FILTER
      filter_params = filter_params_y;
#endif
      filter_size = filter_params.taps;
      assert(filter_params.taps <= MAX_FILTER_TAP);
562

563 564 565 566
      av1_highbd_convolve_vert_facade(
          temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, ref_idx, bd);
    }
Angie Chiang's avatar
Angie Chiang committed
567
  }
568
}
Yaowu Xu's avatar
Yaowu Xu committed
569
#endif  // CONFIG_AOM_HIGHBITDEPTH