convolve_test.cc 65.5 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1 2 3 4 5 6 7 8 9 10
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

11
#include <string.h>
James Zern's avatar
James Zern committed
12 13 14 15
#include "test/acm_random.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
John Koleszar's avatar
John Koleszar committed
16 17 18

#include "./vpx_config.h"
#include "./vp9_rtcd.h"
19
#include "vp9/common/vp9_filter.h"
20
#include "vpx_mem/vpx_mem.h"
21
#include "vpx_ports/mem.h"
John Koleszar's avatar
John Koleszar committed
22 23

namespace {
24

hkuang's avatar
hkuang committed
25
static const unsigned int kMaxDimension = 64;
26

27 28 29 30 31
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int filter_x_stride,
                             const int16_t *filter_y, int filter_y_stride,
                             int w, int h);
John Koleszar's avatar
John Koleszar committed
32 33

struct ConvolveFunctions {
34 35
  ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
                    ConvolveFunc v8, ConvolveFunc v8_avg,
36 37
                    ConvolveFunc hv8, ConvolveFunc hv8_avg,
                    int bd)
John Koleszar's avatar
John Koleszar committed
38
      : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
39
        hv8_avg_(hv8_avg), use_highbd_(bd) {}
John Koleszar's avatar
John Koleszar committed
40

41 42 43 44 45 46
  ConvolveFunc h8_;
  ConvolveFunc v8_;
  ConvolveFunc hv8_;
  ConvolveFunc h8_avg_;
  ConvolveFunc v8_avg_;
  ConvolveFunc hv8_avg_;
47
  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
John Koleszar's avatar
John Koleszar committed
48 49
};

50
typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
51

John Koleszar's avatar
John Koleszar committed
52 53 54
// Reference 8-tap subpixel filter, slightly modified to fit into this test.
#define VP9_FILTER_WEIGHT 128
#define VP9_FILTER_SHIFT 7
James Zern's avatar
James Zern committed
55
uint8_t clip_pixel(int x) {
John Koleszar's avatar
John Koleszar committed
56 57 58 59 60
  return x < 0 ? 0 :
         x > 255 ? 255 :
         x;
}

James Zern's avatar
James Zern committed
61 62 63 64 65 66 67 68
void filter_block2d_8_c(const uint8_t *src_ptr,
                        const unsigned int src_stride,
                        const int16_t *HFilter,
                        const int16_t *VFilter,
                        uint8_t *dst_ptr,
                        unsigned int dst_stride,
                        unsigned int output_width,
                        unsigned int output_height) {
John Koleszar's avatar
John Koleszar committed
69 70 71 72 73
  // Between passes, we use an intermediate buffer whose height is extended to
  // have enough horizontally filtered values as input for the vertical pass.
  // This buffer is allocated to be big enough for the largest block type we
  // support.
  const int kInterp_Extend = 4;
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
  const unsigned int intermediate_height =
      (kInterp_Extend - 1) + output_height + kInterp_Extend;
  unsigned int i, j;

  // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
  //                                 + kInterp_Extend
  //                               = 3 + 16 + 4
  //                               = 23
  // and filter_max_width          = 16
  //
  uint8_t intermediate_buffer[71 * kMaxDimension];
  const int intermediate_next_stride = 1 - intermediate_height * output_width;

  // Horizontal pass (src -> transposed intermediate).
  uint8_t *output_ptr = intermediate_buffer;
  const int src_next_row_stride = src_stride - output_width;
  src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
  for (i = 0; i < intermediate_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      // Apply filter...
      const int temp = (src_ptr[0] * HFilter[0]) +
          (src_ptr[1] * HFilter[1]) +
          (src_ptr[2] * HFilter[2]) +
          (src_ptr[3] * HFilter[3]) +
          (src_ptr[4] * HFilter[4]) +
          (src_ptr[5] * HFilter[5]) +
          (src_ptr[6] * HFilter[6]) +
          (src_ptr[7] * HFilter[7]) +
          (VP9_FILTER_WEIGHT >> 1);  // Rounding

      // Normalize back to 0-255...
      *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
      ++src_ptr;
      output_ptr += intermediate_height;
    }
    src_ptr += src_next_row_stride;
    output_ptr += intermediate_next_stride;
  }

  // Vertical pass (transposed intermediate -> dst).
  src_ptr = intermediate_buffer;
  const int dst_next_row_stride = dst_stride - output_width;
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      // Apply filter...
      const int temp = (src_ptr[0] * VFilter[0]) +
          (src_ptr[1] * VFilter[1]) +
          (src_ptr[2] * VFilter[2]) +
          (src_ptr[3] * VFilter[3]) +
          (src_ptr[4] * VFilter[4]) +
          (src_ptr[5] * VFilter[5]) +
          (src_ptr[6] * VFilter[6]) +
          (src_ptr[7] * VFilter[7]) +
          (VP9_FILTER_WEIGHT >> 1);  // Rounding

      // Normalize back to 0-255...
      *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
      src_ptr += intermediate_height;
    }
    src_ptr += intermediate_next_stride;
    dst_ptr += dst_next_row_stride;
  }
}

void block2d_average_c(uint8_t *src,
                       unsigned int src_stride,
                       uint8_t *output_ptr,
                       unsigned int output_stride,
                       unsigned int output_width,
                       unsigned int output_height) {
  unsigned int i, j;
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    }
    output_ptr += output_stride;
  }
}

void filter_average_block2d_8_c(const uint8_t *src_ptr,
                                const unsigned int src_stride,
                                const int16_t *HFilter,
                                const int16_t *VFilter,
                                uint8_t *dst_ptr,
                                unsigned int dst_stride,
                                unsigned int output_width,
                                unsigned int output_height) {
  uint8_t tmp[kMaxDimension * kMaxDimension];

  assert(output_width <= kMaxDimension);
  assert(output_height <= kMaxDimension);
  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
                     output_width, output_height);
  block2d_average_c(tmp, 64, dst_ptr, dst_stride,
                    output_width, output_height);
}

#if CONFIG_VP9_HIGHBITDEPTH
173 174 175 176 177 178 179 180 181
void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
                               const unsigned int src_stride,
                               const int16_t *HFilter,
                               const int16_t *VFilter,
                               uint16_t *dst_ptr,
                               unsigned int dst_stride,
                               unsigned int output_width,
                               unsigned int output_height,
                               int bd) {
182 183 184 185 186
  // Between passes, we use an intermediate buffer whose height is extended to
  // have enough horizontally filtered values as input for the vertical pass.
  // This buffer is allocated to be big enough for the largest block type we
  // support.
  const int kInterp_Extend = 4;
John Koleszar's avatar
John Koleszar committed
187
  const unsigned int intermediate_height =
James Zern's avatar
James Zern committed
188
      (kInterp_Extend - 1) + output_height + kInterp_Extend;
John Koleszar's avatar
John Koleszar committed
189 190 191 192 193 194 195 196

  /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
   * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
   *                                 + kInterp_Extend
   *                               = 3 + 16 + 4
   *                               = 23
   * and filter_max_width = 16
   */
197
  uint16_t intermediate_buffer[71 * kMaxDimension];
John Koleszar's avatar
John Koleszar committed
198 199 200 201
  const int intermediate_next_stride = 1 - intermediate_height * output_width;

  // Horizontal pass (src -> transposed intermediate).
  {
202
    uint16_t *output_ptr = intermediate_buffer;
John Koleszar's avatar
John Koleszar committed
203 204 205 206 207 208
    const int src_next_row_stride = src_stride - output_width;
    unsigned int i, j;
    src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
    for (i = 0; i < intermediate_height; ++i) {
      for (j = 0; j < output_width; ++j) {
        // Apply filter...
James Zern's avatar
James Zern committed
209 210 211 212 213 214 215 216 217
        const int temp = (src_ptr[0] * HFilter[0]) +
                         (src_ptr[1] * HFilter[1]) +
                         (src_ptr[2] * HFilter[2]) +
                         (src_ptr[3] * HFilter[3]) +
                         (src_ptr[4] * HFilter[4]) +
                         (src_ptr[5] * HFilter[5]) +
                         (src_ptr[6] * HFilter[6]) +
                         (src_ptr[7] * HFilter[7]) +
                         (VP9_FILTER_WEIGHT >> 1);  // Rounding
John Koleszar's avatar
John Koleszar committed
218 219

        // Normalize back to 0-255...
220
        *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
John Koleszar's avatar
John Koleszar committed
221 222 223 224 225 226 227 228 229 230
        ++src_ptr;
        output_ptr += intermediate_height;
      }
      src_ptr += src_next_row_stride;
      output_ptr += intermediate_next_stride;
    }
  }

  // Vertical pass (transposed intermediate -> dst).
  {
231
    uint16_t *src_ptr = intermediate_buffer;
John Koleszar's avatar
John Koleszar committed
232 233 234 235 236
    const int dst_next_row_stride = dst_stride - output_width;
    unsigned int i, j;
    for (i = 0; i < output_height; ++i) {
      for (j = 0; j < output_width; ++j) {
        // Apply filter...
James Zern's avatar
James Zern committed
237 238 239 240 241 242 243 244 245
        const int temp = (src_ptr[0] * VFilter[0]) +
                         (src_ptr[1] * VFilter[1]) +
                         (src_ptr[2] * VFilter[2]) +
                         (src_ptr[3] * VFilter[3]) +
                         (src_ptr[4] * VFilter[4]) +
                         (src_ptr[5] * VFilter[5]) +
                         (src_ptr[6] * VFilter[6]) +
                         (src_ptr[7] * VFilter[7]) +
                         (VP9_FILTER_WEIGHT >> 1);  // Rounding
John Koleszar's avatar
John Koleszar committed
246 247

        // Normalize back to 0-255...
248
        *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
John Koleszar's avatar
John Koleszar committed
249 250 251 252 253 254 255 256
        src_ptr += intermediate_height;
      }
      src_ptr += intermediate_next_stride;
      dst_ptr += dst_next_row_stride;
    }
  }
}

257 258 259 260 261 262 263
void highbd_block2d_average_c(uint16_t *src,
                              unsigned int src_stride,
                              uint16_t *output_ptr,
                              unsigned int output_stride,
                              unsigned int output_width,
                              unsigned int output_height,
                              int bd) {
John Koleszar's avatar
John Koleszar committed
264 265 266 267 268 269 270 271 272
  unsigned int i, j;
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    }
    output_ptr += output_stride;
  }
}

273 274 275 276 277 278 279 280 281
void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
                                       const unsigned int src_stride,
                                       const int16_t *HFilter,
                                       const int16_t *VFilter,
                                       uint16_t *dst_ptr,
                                       unsigned int dst_stride,
                                       unsigned int output_width,
                                       unsigned int output_height,
                                       int bd) {
282 283 284 285
  uint16_t tmp[kMaxDimension * kMaxDimension];

  assert(output_width <= kMaxDimension);
  assert(output_height <= kMaxDimension);
286 287 288 289
  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
                            output_width, output_height, bd);
  highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
                           output_width, output_height, bd);
John Koleszar's avatar
John Koleszar committed
290
}
291
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
292

293
class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
294 295 296 297
 public:
  static void SetUpTestCase() {
    // Force input_ to be unaligned, output to be 16 byte aligned.
    input_ = reinterpret_cast<uint8_t*>(
298
        vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
299
    output_ = reinterpret_cast<uint8_t*>(
300
        vpx_memalign(kDataAlignment, kOutputBufferSize));
301 302 303 304 305 306 307
#if CONFIG_VP9_HIGHBITDEPTH
    input16_ = reinterpret_cast<uint16_t*>(
        vpx_memalign(kDataAlignment,
                     (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
    output16_ = reinterpret_cast<uint16_t*>(
        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
#endif
308 309 310 311 312 313 314
  }

  static void TearDownTestCase() {
    vpx_free(input_ - 1);
    input_ = NULL;
    vpx_free(output_);
    output_ = NULL;
315 316 317 318 319 320
#if CONFIG_VP9_HIGHBITDEPTH
    vpx_free(input16_ - 1);
    input16_ = NULL;
    vpx_free(output16_);
    output16_ = NULL;
#endif
321 322
  }

James Zern's avatar
James Zern committed
323 324
 protected:
  static const int kDataAlignment = 16;
325
  static const int kOuterBlockSize = 256;
James Zern's avatar
James Zern committed
326 327 328 329 330 331 332 333 334 335 336 337
  static const int kInputStride = kOuterBlockSize;
  static const int kOutputStride = kOuterBlockSize;
  static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
  static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;

  int Width() const { return GET_PARAM(0); }
  int Height() const { return GET_PARAM(1); }
  int BorderLeft() const {
    const int center = (kOuterBlockSize - Width()) / 2;
    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
  }
  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
John Koleszar's avatar
John Koleszar committed
338

James Zern's avatar
James Zern committed
339 340 341 342 343 344
  bool IsIndexInBorder(int i) {
    return (i < BorderTop() * kOuterBlockSize ||
            i >= (BorderTop() + Height()) * kOuterBlockSize ||
            i % kOuterBlockSize < BorderLeft() ||
            i % kOuterBlockSize >= (BorderLeft() + Width()));
  }
John Koleszar's avatar
John Koleszar committed
345

James Zern's avatar
James Zern committed
346 347
  virtual void SetUp() {
    UUT_ = GET_PARAM(2);
348
#if CONFIG_VP9_HIGHBITDEPTH
349 350
    if (UUT_->use_highbd_ != 0)
      mask_ = (1 << UUT_->use_highbd_) - 1;
351 352 353
    else
      mask_ = 255;
#endif
Johann's avatar
Johann committed
354
    /* Set up guard blocks for an inner block centered in the outer block */
James Zern's avatar
James Zern committed
355 356 357 358 359
    for (int i = 0; i < kOutputBufferSize; ++i) {
      if (IsIndexInBorder(i))
        output_[i] = 255;
      else
        output_[i] = 0;
John Koleszar's avatar
John Koleszar committed
360 361
    }

James Zern's avatar
James Zern committed
362
    ::libvpx_test::ACMRandom prng;
363
    for (int i = 0; i < kInputBufferSize; ++i) {
364
      if (i & 1) {
365
        input_[i] = 255;
366 367 368 369
#if CONFIG_VP9_HIGHBITDEPTH
        input16_[i] = mask_;
#endif
      } else {
370
        input_[i] = prng.Rand8Extremes();
371 372 373 374
#if CONFIG_VP9_HIGHBITDEPTH
        input16_[i] = prng.Rand16() & mask_;
#endif
      }
375
    }
James Zern's avatar
James Zern committed
376
  }
John Koleszar's avatar
John Koleszar committed
377

378 379
  void SetConstantInput(int value) {
    memset(input_, value, kInputBufferSize);
380 381 382
#if CONFIG_VP9_HIGHBITDEPTH
    vpx_memset16(input16_, value, kInputBufferSize);
#endif
383 384
  }

James Zern's avatar
James Zern committed
385 386 387 388
  void CheckGuardBlocks() {
    for (int i = 0; i < kOutputBufferSize; ++i) {
      if (IsIndexInBorder(i))
        EXPECT_EQ(255, output_[i]);
John Koleszar's avatar
John Koleszar committed
389
    }
James Zern's avatar
James Zern committed
390
  }
John Koleszar's avatar
John Koleszar committed
391

392 393
  uint8_t *input() const {
#if CONFIG_VP9_HIGHBITDEPTH
394
    if (UUT_->use_highbd_ == 0) {
395 396 397 398 399 400
      return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
    } else {
      return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
                                BorderLeft());
    }
#else
James Zern's avatar
James Zern committed
401
    return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
402
#endif
James Zern's avatar
James Zern committed
403
  }
John Koleszar's avatar
John Koleszar committed
404

405 406
  uint8_t *output() const {
#if CONFIG_VP9_HIGHBITDEPTH
407
    if (UUT_->use_highbd_ == 0) {
408 409 410 411 412 413
      return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
    } else {
      return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
                                BorderLeft());
    }
#else
James Zern's avatar
James Zern committed
414
    return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
415 416 417 418 419
#endif
  }

  uint16_t lookup(uint8_t *list, int index) const {
#if CONFIG_VP9_HIGHBITDEPTH
420
    if (UUT_->use_highbd_ == 0) {
421 422 423 424 425 426 427 428 429 430 431
      return list[index];
    } else {
      return CONVERT_TO_SHORTPTR(list)[index];
    }
#else
    return list[index];
#endif
  }

  void assign_val(uint8_t *list, int index, uint16_t val) const {
#if CONFIG_VP9_HIGHBITDEPTH
432
    if (UUT_->use_highbd_ == 0) {
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
      list[index] = (uint8_t) val;
    } else {
      CONVERT_TO_SHORTPTR(list)[index] = val;
    }
#else
    list[index] = (uint8_t) val;
#endif
  }

  void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
                                          const unsigned int src_stride,
                                          const int16_t *HFilter,
                                          const int16_t *VFilter,
                                          uint8_t *dst_ptr,
                                          unsigned int dst_stride,
                                          unsigned int output_width,
                                          unsigned int output_height) {
#if CONFIG_VP9_HIGHBITDEPTH
451
    if (UUT_->use_highbd_ == 0) {
452 453 454 455
      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
                                 dst_ptr, dst_stride, output_width,
                                 output_height);
    } else {
456 457 458 459 460
      highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr),
                                        src_stride, HFilter, VFilter,
                                        CONVERT_TO_SHORTPTR(dst_ptr),
                                        dst_stride, output_width, output_height,
                                        UUT_->use_highbd_);
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
    }
#else
    filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
                               dst_ptr, dst_stride, output_width,
                               output_height);
#endif
  }

  void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
                                  const unsigned int src_stride,
                                  const int16_t *HFilter,
                                  const int16_t *VFilter,
                                  uint8_t *dst_ptr,
                                  unsigned int dst_stride,
                                  unsigned int output_width,
                                  unsigned int output_height) {
#if CONFIG_VP9_HIGHBITDEPTH
478
    if (UUT_->use_highbd_ == 0) {
479 480 481
      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
                         dst_ptr, dst_stride, output_width, output_height);
    } else {
482 483 484 485
      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
                                HFilter, VFilter,
                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
                                output_width, output_height, UUT_->use_highbd_);
486 487 488 489 490
    }
#else
    filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
                       dst_ptr, dst_stride, output_width, output_height);
#endif
James Zern's avatar
James Zern committed
491 492 493 494 495
  }

  const ConvolveFunctions* UUT_;
  static uint8_t* input_;
  static uint8_t* output_;
496 497 498 499 500
#if CONFIG_VP9_HIGHBITDEPTH
  static uint16_t* input16_;
  static uint16_t* output16_;
  int mask_;
#endif
John Koleszar's avatar
John Koleszar committed
501
};
502

503 504
uint8_t* ConvolveTest::input_ = NULL;
uint8_t* ConvolveTest::output_ = NULL;
505 506 507 508
#if CONFIG_VP9_HIGHBITDEPTH
uint16_t* ConvolveTest::input16_ = NULL;
uint16_t* ConvolveTest::output16_ = NULL;
#endif
John Koleszar's avatar
John Koleszar committed
509 510 511 512 513 514 515 516

TEST_P(ConvolveTest, GuardBlocks) {
  CheckGuardBlocks();
}

TEST_P(ConvolveTest, CopyHoriz) {
  uint8_t* const in = input();
  uint8_t* const out = output();
517
  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
John Koleszar's avatar
John Koleszar committed
518

519
  ASM_REGISTER_STATE_CHECK(
John Koleszar's avatar
John Koleszar committed
520 521 522 523 524 525 526
      UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                Width(), Height()));

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
527 528
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                lookup(in, y * kInputStride + x))
John Koleszar's avatar
John Koleszar committed
529 530 531 532 533 534
          << "(" << x << "," << y << ")";
}

TEST_P(ConvolveTest, CopyVert) {
  uint8_t* const in = input();
  uint8_t* const out = output();
535
  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
John Koleszar's avatar
John Koleszar committed
536

537
  ASM_REGISTER_STATE_CHECK(
John Koleszar's avatar
John Koleszar committed
538 539 540 541 542 543 544
      UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                Width(), Height()));

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
545 546
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                lookup(in, y * kInputStride + x))
John Koleszar's avatar
John Koleszar committed
547 548 549 550 551 552
          << "(" << x << "," << y << ")";
}

TEST_P(ConvolveTest, Copy2D) {
  uint8_t* const in = input();
  uint8_t* const out = output();
553
  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
John Koleszar's avatar
John Koleszar committed
554

555
  ASM_REGISTER_STATE_CHECK(
John Koleszar's avatar
John Koleszar committed
556 557 558 559 560 561 562
      UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                 Width(), Height()));

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
563 564
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                lookup(in, y * kInputStride + x))
John Koleszar's avatar
John Koleszar committed
565 566 567
          << "(" << x << "," << y << ")";
}

568
const int kNumFilterBanks = 4;
569 570 571 572
const int kNumFilters = 16;

TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
573 574
    const InterpKernel *filters =
        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
    for (int i = 0; i < kNumFilters; i++) {
      const int p0 = filters[i][0] + filters[i][1];
      const int p1 = filters[i][2] + filters[i][3];
      const int p2 = filters[i][4] + filters[i][5];
      const int p3 = filters[i][6] + filters[i][7];
      EXPECT_LE(p0, 128);
      EXPECT_LE(p1, 128);
      EXPECT_LE(p2, 128);
      EXPECT_LE(p3, 128);
      EXPECT_LE(p0 + p3, 128);
      EXPECT_LE(p0 + p3 + p1, 128);
      EXPECT_LE(p0 + p3 + p1 + p2, 128);
      EXPECT_EQ(p0 + p1 + p2 + p3, 128);
    }
  }
}
591

592 593
const int16_t kInvalidFilter[8] = { 0 };

John Koleszar's avatar
John Koleszar committed
594 595 596
TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
  uint8_t* const in = input();
  uint8_t* const out = output();
597 598 599 600
#if CONFIG_VP9_HIGHBITDEPTH
  uint8_t ref8[kOutputStride * kMaxDimension];
  uint16_t ref16[kOutputStride * kMaxDimension];
  uint8_t* ref;
601
  if (UUT_->use_highbd_ == 0) {
602 603 604 605 606
    ref = ref8;
  } else {
    ref = CONVERT_TO_BYTEPTR(ref16);
  }
#else
John Koleszar's avatar
John Koleszar committed
607
  uint8_t ref[kOutputStride * kMaxDimension];
608
#endif
609 610

  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
611 612
    const InterpKernel *filters =
        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
613 614 615
    const InterpKernel *const eighttap_smooth =
        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);

616 617
    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
618 619 620 621
        wrapper_filter_block2d_8_c(in, kInputStride,
                                   filters[filter_x], filters[filter_y],
                                   ref, kOutputStride,
                                   Width(), Height());
622

623
        if (filters == eighttap_smooth || (filter_x && filter_y))
624
          ASM_REGISTER_STATE_CHECK(
625 626 627 628
              UUT_->hv8_(in, kInputStride, out, kOutputStride,
                         filters[filter_x], 16, filters[filter_y], 16,
                         Width(), Height()));
        else if (filter_y)
629
          ASM_REGISTER_STATE_CHECK(
630
              UUT_->v8_(in, kInputStride, out, kOutputStride,
631
                        kInvalidFilter, 16, filters[filter_y], 16,
632 633
                        Width(), Height()));
        else
634
          ASM_REGISTER_STATE_CHECK(
635
              UUT_->h8_(in, kInputStride, out, kOutputStride,
636
                        filters[filter_x], 16, kInvalidFilter, 16,
637 638 639 640 641 642
                        Width(), Height()));

        CheckGuardBlocks();

        for (int y = 0; y < Height(); ++y)
          for (int x = 0; x < Width(); ++x)
643 644
            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
                      lookup(out, y * kOutputStride + x))
645 646 647 648
                << "mismatch at (" << x << "," << y << "), "
                << "filters (" << filter_bank << ","
                << filter_x << "," << filter_y << ")";
      }
John Koleszar's avatar
John Koleszar committed
649 650 651 652 653 654 655
    }
  }
}

TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
  uint8_t* const in = input();
  uint8_t* const out = output();
656 657 658 659
#if CONFIG_VP9_HIGHBITDEPTH
  uint8_t ref8[kOutputStride * kMaxDimension];
  uint16_t ref16[kOutputStride * kMaxDimension];
  uint8_t* ref;
660
  if (UUT_->use_highbd_ == 0) {
661 662 663 664 665
    ref = ref8;
  } else {
    ref = CONVERT_TO_BYTEPTR(ref16);
  }
#else
John Koleszar's avatar
John Koleszar committed
666
  uint8_t ref[kOutputStride * kMaxDimension];
667
#endif
John Koleszar's avatar
John Koleszar committed
668 669 670 671 672

  // Populate ref and out with some random data
  ::libvpx_test::ACMRandom prng;
  for (int y = 0; y < Height(); ++y) {
    for (int x = 0; x < Width(); ++x) {
673 674
      uint16_t r;
#if CONFIG_VP9_HIGHBITDEPTH
675
      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
676 677 678 679 680 681 682
        r = prng.Rand8Extremes();
      } else {
        r = prng.Rand16() & mask_;
      }
#else
      r = prng.Rand8Extremes();
#endif
John Koleszar's avatar
John Koleszar committed
683

684 685
      assign_val(out, y * kOutputStride + x, r);
      assign_val(ref, y * kOutputStride + x, r);
John Koleszar's avatar
John Koleszar committed
686 687 688
    }
  }

689
  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
690 691
    const InterpKernel *filters =
        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
692 693
    const InterpKernel *const eighttap_smooth =
        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
694 695 696

    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
697 698 699 700
        wrapper_filter_average_block2d_8_c(in, kInputStride,
                                           filters[filter_x], filters[filter_y],
                                           ref, kOutputStride,
                                           Width(), Height());
701

702
        if (filters == eighttap_smooth || (filter_x && filter_y))
703
          ASM_REGISTER_STATE_CHECK(
704 705 706 707
              UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
                             Width(), Height()));
        else if (filter_y)
708
          ASM_REGISTER_STATE_CHECK(
709 710 711 712
              UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
                            filters[filter_x], 16, filters[filter_y], 16,
                            Width(), Height()));
        else
713
          ASM_REGISTER_STATE_CHECK(
714 715 716 717 718 719 720 721
              UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
                            filters[filter_x], 16, filters[filter_y], 16,
                            Width(), Height()));

        CheckGuardBlocks();

        for (int y = 0; y < Height(); ++y)
          for (int x = 0; x < Width(); ++x)
722 723
            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
                      lookup(out, y * kOutputStride + x))
724 725 726 727
                << "mismatch at (" << x << "," << y << "), "
                << "filters (" << filter_bank << ","
                << filter_x << "," << filter_y << ")";
      }
John Koleszar's avatar
John Koleszar committed
728 729 730 731
    }
  }
}

732 733 734 735 736 737 738
TEST_P(ConvolveTest, FilterExtremes) {
  uint8_t *const in = input();
  uint8_t *const out = output();
#if CONFIG_VP9_HIGHBITDEPTH
  uint8_t ref8[kOutputStride * kMaxDimension];
  uint16_t ref16[kOutputStride * kMaxDimension];
  uint8_t *ref;
739
  if (UUT_->use_highbd_ == 0) {
740 741 742 743 744 745 746 747 748 749 750 751 752 753
    ref = ref8;
  } else {
    ref = CONVERT_TO_BYTEPTR(ref16);
  }
#else
  uint8_t ref[kOutputStride * kMaxDimension];
#endif

  // Populate ref and out with some random data
  ::libvpx_test::ACMRandom prng;
  for (int y = 0; y < Height(); ++y) {
    for (int x = 0; x < Width(); ++x) {
      uint16_t r;
#if CONFIG_VP9_HIGHBITDEPTH
754
      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
        r = prng.Rand8Extremes();
      } else {
        r = prng.Rand16() & mask_;
      }
#else
      r = prng.Rand8Extremes();
#endif
      assign_val(out, y * kOutputStride + x, r);
      assign_val(ref, y * kOutputStride + x, r);
    }
  }

  for (int axis = 0; axis < 2; axis++) {
    int seed_val = 0;
    while (seed_val < 256) {
      for (int y = 0; y < 8; ++y) {
        for (int x = 0; x < 8; ++x) {
#if CONFIG_VP9_HIGHBITDEPTH
            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
                       ((seed_val >> (axis ? y : x)) & 1) * mask_);
#else
            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
                       ((seed_val >> (axis ? y : x)) & 1) * 255);
#endif
          if (axis) seed_val++;
        }
        if (axis)
          seed_val-= 8;
        else
          seed_val++;
      }
      if (axis) seed_val += 8;

      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
        const InterpKernel *filters =
            vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
        const InterpKernel *const eighttap_smooth =
            vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
            wrapper_filter_block2d_8_c(in, kInputStride,
                                       filters[filter_x], filters[filter_y],
                                       ref, kOutputStride,
                                       Width(), Height());
            if (filters == eighttap_smooth || (filter_x && filter_y))
              ASM_REGISTER_STATE_CHECK(
                  UUT_->hv8_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
                             Width(), Height()));
            else if (filter_y)
              ASM_REGISTER_STATE_CHECK(
                  UUT_->v8_(in, kInputStride, out, kOutputStride,
                            kInvalidFilter, 16, filters[filter_y], 16,
                            Width(), Height()));
            else
              ASM_REGISTER_STATE_CHECK(
                  UUT_->h8_(in, kInputStride, out, kOutputStride,
                            filters[filter_x], 16, kInvalidFilter, 16,
                            Width(), Height()));

            for (int y = 0; y < Height(); ++y)
              for (int x = 0; x < Width(); ++x)
                ASSERT_EQ(lookup(ref, y * kOutputStride + x),
                          lookup(out, y * kOutputStride + x))
                    << "mismatch at (" << x << "," << y << "), "
                    << "filters (" << filter_bank << ","
                    << filter_x << "," << filter_y << ")";
          }
        }
      }
    }
  }
}

829
DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
John Koleszar's avatar
John Koleszar committed
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
    { 0,   0,   0,   0,   0,   0,   0, 128},
    { 0,   0,   0,   0,   0,   0, 128},
    { 0,   0,   0,   0,   0, 128},
    { 0,   0,   0,   0, 128},
    { 0,   0,   0, 128},
    { 0,   0, 128},
    { 0, 128},
    { 128},
    { 0,   0,   0,   0,   0,   0,   0, 128},
    { 0,   0,   0,   0,   0,   0, 128},
    { 0,   0,   0,   0,   0, 128},
    { 0,   0,   0,   0, 128},
    { 0,   0,   0, 128},
    { 0,   0, 128},
    { 0, 128},
845 846 847
    { 128}
};

848
/* This test exercises the horizontal and vertical filter functions. */
849 850 851
TEST_P(ConvolveTest, ChangeFilterWorks) {
  uint8_t* const in = input();
  uint8_t* const out = output();
852 853 854 855 856 857 858 859 860 861 862

  /* Assume that the first input sample is at the 8/16th position. */
  const int kInitialSubPelOffset = 8;

  /* Filters are 8-tap, so the first filter tap will be applied to the pixel
   * at position -3 with respect to the current filtering position. Since
   * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
   * which is non-zero only in the last tap. So, applying the filter at the
   * current input position will result in an output equal to the pixel at
   * offset +4 (-3 + 7) with respect to the current filtering position.
   */
863
  const int kPixelSelected = 4;
John Koleszar's avatar
John Koleszar committed
864

865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880
  /* Assume that each output pixel requires us to step on by 17/16th pixels in
   * the input.
   */
  const int kInputPixelStep = 17;

  /* The filters are setup in such a way that the expected output produces
   * sets of 8 identical output samples. As the filter position moves to the
   * next 1/16th pixel position the only active (=128) filter tap moves one
   * position to the left, resulting in the same input pixel being replicated
   * in to the output for 8 consecutive samples. After each set of 8 positions
   * the filters select a different input pixel. kFilterPeriodAdjust below
   * computes which input pixel is written to the output for a specified
   * x or y position.
   */

  /* Test the horizontal filter. */
881 882 883 884
  ASM_REGISTER_STATE_CHECK(
      UUT_->h8_(in, kInputStride, out, kOutputStride,
                kChangeFilters[kInitialSubPelOffset],
                kInputPixelStep, NULL, 0, Width(), Height()));
John Koleszar's avatar
John Koleszar committed
885

886
  for (int x = 0; x < Width(); ++x) {
887
    const int kFilterPeriodAdjust = (x >> 3) << 3;
888 889 890 891
    const int ref_x =
        kPixelSelected + ((kInitialSubPelOffset
            + kFilterPeriodAdjust * kInputPixelStep)
                          >> SUBPEL_BITS);
892 893
    ASSERT_EQ(lookup(in, ref_x), lookup(out, x))
        << "x == " << x << "width = " << Width();
John Koleszar's avatar
John Koleszar committed
894 895
  }

896
  /* Test the vertical filter. */
897 898 899 900
  ASM_REGISTER_STATE_CHECK(
      UUT_->v8_(in, kInputStride, out, kOutputStride,
                NULL, 0, kChangeFilters[kInitialSubPelOffset],
                kInputPixelStep, Width(), Height()));
John Koleszar's avatar
John Koleszar committed
901

902
  for (int y = 0; y < Height(); ++y) {
903
    const int kFilterPeriodAdjust = (y >> 3) << 3;
904 905 906 907
    const int ref_y =
        kPixelSelected + ((kInitialSubPelOffset
            + kFilterPeriodAdjust * kInputPixelStep)
                          >> SUBPEL_BITS);
908 909
    ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride))
        << "y == " << y;
John Koleszar's avatar
John Koleszar committed
910 911
  }

912
  /* Test the horizontal and vertical filters in combination. */
913 914 915 916 917
  ASM_REGISTER_STATE_CHECK(
      UUT_->hv8_(in, kInputStride, out, kOutputStride,
                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
                 Width(), Height()));
John Koleszar's avatar
John Koleszar committed
918

919
  for (int y = 0; y < Height(); ++y) {
920
    const int kFilterPeriodAdjustY = (y >> 3) << 3;
921 922 923 924
    const int ref_y =
        kPixelSelected + ((kInitialSubPelOffset
            + kFilterPeriodAdjustY * kInputPixelStep)
                          >> SUBPEL_BITS);
925
    for (int x = 0; x < Width(); ++x) {
926
      const int kFilterPeriodAdjustX = (x >> 3) << 3;
927 928 929 930
      const int ref_x =
          kPixelSelected + ((kInitialSubPelOffset
              + kFilterPeriodAdjustX * kInputPixelStep)
                            >> SUBPEL_BITS);
931

932 933
      ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x),
                lookup(out, y * kOutputStride + x))
John Koleszar's avatar
John Koleszar committed
934 935 936 937 938
          << "x == " << x << ", y == " << y;
    }
  }
}

939 940 941 942 943
/* This test exercises that enough rows and columns are filtered with every
   possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest, CheckScalingFiltering) {
  uint8_t* const in = input();
  uint8_t* const out = output();
944
  const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
945 946 947 948 949 950

  SetConstantInput(127);

  for (int frac = 0; frac < 16; ++frac) {
    for (int step = 1; step <= 32; ++step) {
      /* Test the horizontal and vertical filters in combination. */
951 952 953 954
      ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
                                          eighttap[frac], step,
                                          eighttap[frac], step,
                                          Width(), Height()));
955 956 957 958 959

      CheckGuardBlocks();

      for (int y = 0; y < Height(); ++y) {
        for (int x = 0; x < Width(); ++x) {
960 961
          ASSERT_EQ(lookup(in, y * kInputStride + x),
                    lookup(out, y * kOutputStride + x))
962 963 964 965 966 967 968 969
              << "x == " << x << ", y == " << y
              << ", frac == " << frac << ", step == " << step;
        }
      }
    }
  }
}

John Koleszar's avatar
John Koleszar committed
970 971
using std::tr1::make_tuple;

972
#if CONFIG_VP9_HIGHBITDEPTH
973 974 975 976 977 978 979 980
#if HAVE_SSE2 && ARCH_X86_64
void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x,
                                 int filter_x_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
981 982 983
  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
                                  filter_x_stride, filter_y, filter_y_stride,
                                  w, h, 8);
984 985 986 987 988 989 990 991 992
}

void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                     uint8_t *dst, ptrdiff_t dst_stride,
                                     const int16_t *filter_x,
                                     int filter_x_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
993 994 995
  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 8);
996 997 998 999 1000 1001 1002 1003 1004
}

void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride,
                                const int16_t *filter_x,
                                int filter_x_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
1005 1006 1007
  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 8);
1008 1009 1010 1011 1012 1013 1014 1015 1016
}

void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                    uint8_t *dst, ptrdiff_t dst_stride,
                                    const int16_t *filter_x,
                                    int filter_x_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
1017 1018 1019
  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 8);
1020 1021 1022 1023 1024 1025 1026 1027 1028
}

void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
                           const int16_t *filter_x,
                           int filter_x_stride,
                           const int16_t *filter_y,
                           int filter_y_stride,
                           int w, int h) {
1029 1030 1031
  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 8);
1032 1033 1034 1035 1036 1037 1038 1039 1040
}

void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x,
                               int filter_x_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
1041 1042 1043
  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 8);
1044 1045 1046 1047 1048 1049 1050 1051 1052
}

void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x,
                                  int filter_x_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
1053 1054 1055
  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 10);
1056 1057 1058 1059 1060 1061 1062 1063 1064
}

void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                      uint8_t *dst, ptrdiff_t dst_stride,
                                      const int16_t *filter_x,
                                      int filter_x_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
1065 1066 1067
  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 10);
1068 1069 1070 1071 1072 1073 1074 1075 1076
}

void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x,
                                 int filter_x_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
1077 1078 1079
  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 10);
1080 1081 1082 1083 1084 1085 1086 1087 1088
}

void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                     uint8_t *dst, ptrdiff_t dst_stride,
                                     const int16_t *filter_x,
                                     int filter_x_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
1089 1090 1091
  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 10);
1092 1093 1094 1095 1096 1097 1098 1099 1100
}

void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x,
                            int filter_x_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
1101 1102 1103
  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 10);
1104 1105 1106 1107 1108 1109 1110 1111 1112
}

void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride,
                                const int16_t *filter_x,
                                int filter_x_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
1113 1114 1115
  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 10);
1116 1117 1118 1119 1120 1121 1122 1123 1124
}

void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x,
                                  int filter_x_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
1125 1126 1127
  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 12);
1128 1129 1130 1131 1132 1133 1134 1135 1136
}

void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                      uint8_t *dst, ptrdiff_t dst_stride,
                                      const int16_t *filter_x,
                                      int filter_x_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
1137 1138 1139
  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 12);
1140 1141 1142 1143 1144 1145 1146 1147 1148
}

void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x,
                                 int filter_x_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
1149 1150 1151
  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 12);
1152 1153 1154 1155 1156 1157 1158 1159 1160
}

void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                     uint8_t *dst, ptrdiff_t dst_stride,
                                     const int16_t *filter_x,
                                     int filter_x_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
1161 1162 1163
  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 12);
1164 1165 1166 1167 1168 1169 1170 1171 1172
}

void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x,
                            int filter_x_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
1173 1174 1175
  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 12);
1176 1177 1178 1179 1180 1181 1182 1183 1184
}

void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride,
                                const int16_t *filter_x,
                                int filter_x_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
1185 1186 1187
  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 12);
1188 1189 1190 1191 1192 1193 1194 1195 1196 1197
}
#endif  // HAVE_SSE2 && ARCH_X86_64

void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
                              const int16_t *filter_x,
                              int filter_x_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
1198 1199 1200
  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 8);
1201 1202 1203 1204 1205 1206 1207 1208 1209
}

void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x,
                                  int filter_x_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
1210 1211 1212
  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 8);
1213 1214 1215 1216 1217 1218 1219 1220 1221
}

void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x,
                             int filter_x_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
1222 1223 1224
  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 8);
1225 1226 1227 1228 1229 1230 1231 1232 1233
}

void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x,
                                 int filter_x_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
1234 1235 1236
  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 8);
1237 1238 1239 1240 1241