convolve_test.cc 47 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
John Koleszar's avatar
John Koleszar committed
3
 *
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Johann's avatar
Johann committed
10
 */
John Koleszar's avatar
John Koleszar committed
11

12
#include <string.h>
13

14
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
John Koleszar's avatar
John Koleszar committed
15

16 17
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
18 19 20 21
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
22 23 24
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/aom_filter.h"
#include "aom_mem/aom_mem.h"
25
#include "aom_ports/mem.h"
26
#include "aom_ports/aom_timer.h"
Angie Chiang's avatar
Angie Chiang committed
27
#include "av1/common/filter.h"
John Koleszar's avatar
John Koleszar committed
28 29

namespace {
30

31
static const unsigned int kMaxDimension = MAX_SB_SIZE;
32

33 34 35 36 37
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int filter_x_stride,
                             const int16_t *filter_y, int filter_y_stride,
                             int w, int h);
John Koleszar's avatar
John Koleszar committed
38 39

struct ConvolveFunctions {
clang-format's avatar
clang-format committed
40 41 42 43 44 45
  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
                    ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
                    ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
                    ConvolveFunc sh8_avg, ConvolveFunc sv8,
                    ConvolveFunc sv8_avg, ConvolveFunc shv8,
                    ConvolveFunc shv8_avg, int bd)
46
      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
47 48 49
        v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
        sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
        use_highbd_(bd) {}
John Koleszar's avatar
John Koleszar committed
50

51 52
  ConvolveFunc copy_;
  ConvolveFunc avg_;
53 54 55 56 57 58
  ConvolveFunc h8_;
  ConvolveFunc v8_;
  ConvolveFunc hv8_;
  ConvolveFunc h8_avg_;
  ConvolveFunc v8_avg_;
  ConvolveFunc hv8_avg_;
clang-format's avatar
clang-format committed
59 60 61 62 63 64
  ConvolveFunc sh8_;       // scaled horiz
  ConvolveFunc sv8_;       // scaled vert
  ConvolveFunc shv8_;      // scaled horiz/vert
  ConvolveFunc sh8_avg_;   // scaled avg horiz
  ConvolveFunc sv8_avg_;   // scaled avg vert
  ConvolveFunc shv8_avg_;  // scaled avg horiz/vert
65
  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
John Koleszar's avatar
John Koleszar committed
66 67
};

68
typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
69

70
#define ALL_SIZES_64(convolve_fn)                                         \
clang-format's avatar
clang-format committed
71 72 73 74 75 76 77
  make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
      make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
      make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
      make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
      make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
      make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
      make_tuple(64, 64, &convolve_fn)
78 79 80 81 82 83 84

#if CONFIG_AV1 && CONFIG_EXT_PARTITION
#define ALL_SIZES(convolve_fn)                                          \
  make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
      make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
#else
#define ALL_SIZES ALL_SIZES_64
85
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
86

John Koleszar's avatar
John Koleszar committed
87
// Reference 8-tap subpixel filter, slightly modified to fit into this test.
88 89
#define AV1_FILTER_WEIGHT 128
#define AV1_FILTER_SHIFT 7
clang-format's avatar
clang-format committed
90
uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
John Koleszar's avatar
John Koleszar committed
91

Yaowu Xu's avatar
Yaowu Xu committed
92
void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
clang-format's avatar
clang-format committed
93 94 95
                        const int16_t *HFilter, const int16_t *VFilter,
                        uint8_t *dst_ptr, unsigned int dst_stride,
                        unsigned int output_width, unsigned int output_height) {
John Koleszar's avatar
John Koleszar committed
96 97 98 99 100
  // Between passes, we use an intermediate buffer whose height is extended to
  // have enough horizontally filtered values as input for the vertical pass.
  // This buffer is allocated to be big enough for the largest block type we
  // support.
  const int kInterp_Extend = 4;
101 102 103 104
  const unsigned int intermediate_height =
      (kInterp_Extend - 1) + output_height + kInterp_Extend;
  unsigned int i, j;

105 106
  assert(intermediate_height > 7);

107 108 109 110 111 112 113
  // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
  //                                 + kInterp_Extend
  //                               = 3 + 16 + 4
  //                               = 23
  // and filter_max_width          = 16
  //
clang-format's avatar
clang-format committed
114
  uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
115 116
  const int intermediate_next_stride =
      1 - static_cast<int>(intermediate_height * output_width);
117 118 119 120 121 122 123 124

  // Horizontal pass (src -> transposed intermediate).
  uint8_t *output_ptr = intermediate_buffer;
  const int src_next_row_stride = src_stride - output_width;
  src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
  for (i = 0; i < intermediate_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      // Apply filter...
clang-format's avatar
clang-format committed
125 126 127 128
      const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
                       (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
                       (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
                       (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
129
                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
130 131

      // Normalize back to 0-255...
132
      *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
133 134 135 136 137 138 139 140 141 142 143 144 145
      ++src_ptr;
      output_ptr += intermediate_height;
    }
    src_ptr += src_next_row_stride;
    output_ptr += intermediate_next_stride;
  }

  // Vertical pass (transposed intermediate -> dst).
  src_ptr = intermediate_buffer;
  const int dst_next_row_stride = dst_stride - output_width;
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      // Apply filter...
clang-format's avatar
clang-format committed
146 147 148 149
      const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
                       (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
                       (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
                       (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
150
                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
151 152

      // Normalize back to 0-255...
153
      *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
154 155 156 157 158 159 160
      src_ptr += intermediate_height;
    }
    src_ptr += intermediate_next_stride;
    dst_ptr += dst_next_row_stride;
  }
}

clang-format's avatar
clang-format committed
161 162 163
void block2d_average_c(uint8_t *src, unsigned int src_stride,
                       uint8_t *output_ptr, unsigned int output_stride,
                       unsigned int output_width, unsigned int output_height) {
164 165 166 167 168 169 170 171 172 173 174
  unsigned int i, j;
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    }
    output_ptr += output_stride;
  }
}

void filter_average_block2d_8_c(const uint8_t *src_ptr,
                                const unsigned int src_stride,
clang-format's avatar
clang-format committed
175 176
                                const int16_t *HFilter, const int16_t *VFilter,
                                uint8_t *dst_ptr, unsigned int dst_stride,
177 178 179 180 181 182
                                unsigned int output_width,
                                unsigned int output_height) {
  uint8_t tmp[kMaxDimension * kMaxDimension];

  assert(output_width <= kMaxDimension);
  assert(output_height <= kMaxDimension);
183
  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
184
                     output_width, output_height);
clang-format's avatar
clang-format committed
185 186
  block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
                    output_height);
187 188
}

189 190
void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
                               const unsigned int src_stride,
clang-format's avatar
clang-format committed
191 192
                               const int16_t *HFilter, const int16_t *VFilter,
                               uint16_t *dst_ptr, unsigned int dst_stride,
193
                               unsigned int output_width,
clang-format's avatar
clang-format committed
194
                               unsigned int output_height, int bd) {
195 196 197 198 199
  // Between passes, we use an intermediate buffer whose height is extended to
  // have enough horizontally filtered values as input for the vertical pass.
  // This buffer is allocated to be big enough for the largest block type we
  // support.
  const int kInterp_Extend = 4;
John Koleszar's avatar
John Koleszar committed
200
  const unsigned int intermediate_height =
James Zern's avatar
James Zern committed
201
      (kInterp_Extend - 1) + output_height + kInterp_Extend;
John Koleszar's avatar
John Koleszar committed
202 203 204 205 206 207 208 209

  /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
   * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
   *                                 + kInterp_Extend
   *                               = 3 + 16 + 4
   *                               = 23
   * and filter_max_width = 16
   */
210
  uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
211 212
  const int intermediate_next_stride =
      1 - static_cast<int>(intermediate_height * output_width);
John Koleszar's avatar
John Koleszar committed
213 214 215

  // Horizontal pass (src -> transposed intermediate).
  {
216
    uint16_t *output_ptr = intermediate_buffer;
John Koleszar's avatar
John Koleszar committed
217 218 219 220 221 222
    const int src_next_row_stride = src_stride - output_width;
    unsigned int i, j;
    src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
    for (i = 0; i < intermediate_height; ++i) {
      for (j = 0; j < output_width; ++j) {
        // Apply filter...
clang-format's avatar
clang-format committed
223 224 225 226
        const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
                         (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
                         (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
                         (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
227
                         (AV1_FILTER_WEIGHT >> 1);  // Rounding
John Koleszar's avatar
John Koleszar committed
228 229

        // Normalize back to 0-255...
230
        *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
John Koleszar's avatar
John Koleszar committed
231 232 233 234 235 236 237 238 239 240
        ++src_ptr;
        output_ptr += intermediate_height;
      }
      src_ptr += src_next_row_stride;
      output_ptr += intermediate_next_stride;
    }
  }

  // Vertical pass (transposed intermediate -> dst).
  {
241
    const uint16_t *interm_ptr = intermediate_buffer;
John Koleszar's avatar
John Koleszar committed
242 243 244 245 246
    const int dst_next_row_stride = dst_stride - output_width;
    unsigned int i, j;
    for (i = 0; i < output_height; ++i) {
      for (j = 0; j < output_width; ++j) {
        // Apply filter...
247 248 249 250 251 252
        const int temp =
            (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
            (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
            (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
            (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
            (AV1_FILTER_WEIGHT >> 1);  // Rounding
John Koleszar's avatar
John Koleszar committed
253 254

        // Normalize back to 0-255...
255
        *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
256
        interm_ptr += intermediate_height;
John Koleszar's avatar
John Koleszar committed
257
      }
258
      interm_ptr += intermediate_next_stride;
John Koleszar's avatar
John Koleszar committed
259 260 261 262 263
      dst_ptr += dst_next_row_stride;
    }
  }
}

clang-format's avatar
clang-format committed
264 265
void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
                              uint16_t *output_ptr, unsigned int output_stride,
266
                              unsigned int output_width,
267
                              unsigned int output_height) {
John Koleszar's avatar
John Koleszar committed
268 269 270 271 272 273 274 275 276
  unsigned int i, j;
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    }
    output_ptr += output_stride;
  }
}

clang-format's avatar
clang-format committed
277
void highbd_filter_average_block2d_8_c(
Yaowu Xu's avatar
Yaowu Xu committed
278 279 280
    const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
    const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
    unsigned int output_width, unsigned int output_height, int bd) {
281 282 283 284
  uint16_t tmp[kMaxDimension * kMaxDimension];

  assert(output_width <= kMaxDimension);
  assert(output_height <= kMaxDimension);
clang-format's avatar
clang-format committed
285 286
  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
                            kMaxDimension, output_width, output_height, bd);
287
  highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
288
                           output_width, output_height);
John Koleszar's avatar
John Koleszar committed
289 290
}

291
class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
292 293 294
 public:
  static void SetUpTestCase() {
    // Force input_ to be unaligned, output to be 16 byte aligned.
clang-format's avatar
clang-format committed
295
    input_ = reinterpret_cast<uint8_t *>(
296
                 aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
clang-format's avatar
clang-format committed
297 298
             1;
    output_ = reinterpret_cast<uint8_t *>(
299
        aom_memalign(kDataAlignment, kOutputBufferSize));
clang-format's avatar
clang-format committed
300
    output_ref_ = reinterpret_cast<uint8_t *>(
301 302
        aom_memalign(kDataAlignment, kOutputBufferSize));
    input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
clang-format's avatar
clang-format committed
303 304 305
                   kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
               1;
    output16_ = reinterpret_cast<uint16_t *>(
306
        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
clang-format's avatar
clang-format committed
307
    output16_ref_ = reinterpret_cast<uint16_t *>(
308
        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
309 310
  }

311
  virtual void TearDown() { libaom_test::ClearSystemState(); }
312

313
  static void TearDownTestCase() {
314
    aom_free(input_ - 1);
315
    input_ = NULL;
316
    aom_free(output_);
317
    output_ = NULL;
318
    aom_free(output_ref_);
319
    output_ref_ = NULL;
320
    aom_free(input16_ - 1);
321
    input16_ = NULL;
322
    aom_free(output16_);
323
    output16_ = NULL;
324
    aom_free(output16_ref_);
325
    output16_ref_ = NULL;
326 327
  }

James Zern's avatar
James Zern committed
328 329
 protected:
  static const int kDataAlignment = 16;
clang-format's avatar
clang-format committed
330
  static const int kOuterBlockSize = 4 * kMaxDimension;
James Zern's avatar
James Zern committed
331 332 333 334 335 336 337 338 339 340 341 342
  static const int kInputStride = kOuterBlockSize;
  static const int kOutputStride = kOuterBlockSize;
  static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
  static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;

  int Width() const { return GET_PARAM(0); }
  int Height() const { return GET_PARAM(1); }
  int BorderLeft() const {
    const int center = (kOuterBlockSize - Width()) / 2;
    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
  }
  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
John Koleszar's avatar
John Koleszar committed
343

James Zern's avatar
James Zern committed
344 345 346 347 348 349
  bool IsIndexInBorder(int i) {
    return (i < BorderTop() * kOuterBlockSize ||
            i >= (BorderTop() + Height()) * kOuterBlockSize ||
            i % kOuterBlockSize < BorderLeft() ||
            i % kOuterBlockSize >= (BorderLeft() + Width()));
  }
John Koleszar's avatar
John Koleszar committed
350

James Zern's avatar
James Zern committed
351 352
  virtual void SetUp() {
    UUT_ = GET_PARAM(2);
353 354
    if (UUT_->use_highbd_ != 0)
      mask_ = (1 << UUT_->use_highbd_) - 1;
355 356
    else
      mask_ = 255;
Johann's avatar
Johann committed
357
    /* Set up guard blocks for an inner block centered in the outer block */
James Zern's avatar
James Zern committed
358
    for (int i = 0; i < kOutputBufferSize; ++i) {
359
      if (IsIndexInBorder(i)) {
James Zern's avatar
James Zern committed
360
        output_[i] = 255;
361 362
        output16_[i] = mask_;
      } else {
James Zern's avatar
James Zern committed
363
        output_[i] = 0;
364 365
        output16_[i] = 0;
      }
John Koleszar's avatar
John Koleszar committed
366 367
    }

368
    ::libaom_test::ACMRandom prng;
369
    for (int i = 0; i < kInputBufferSize; ++i) {
370
      if (i & 1) {
371
        input_[i] = 255;
372 373
        input16_[i] = mask_;
      } else {
374
        input_[i] = prng.Rand8Extremes();
375 376
        input16_[i] = prng.Rand16() & mask_;
      }
377
    }
James Zern's avatar
James Zern committed
378
  }
John Koleszar's avatar
John Koleszar committed
379

380 381
  void SetConstantInput(int value) {
    memset(input_, value, kInputBufferSize);
382
    aom_memset16(input16_, value, kInputBufferSize);
383 384
  }

385
  void CopyOutputToRef() {
James Zern's avatar
James Zern committed
386
    memcpy(output_ref_, output_, kOutputBufferSize);
387 388
    // Copy 16-bit pixels values. The effective number of bytes is double.
    memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
389 390
  }

James Zern's avatar
James Zern committed
391 392
  void CheckGuardBlocks() {
    for (int i = 0; i < kOutputBufferSize; ++i) {
393 394 395
      if (IsIndexInBorder(i)) {
        EXPECT_EQ(255, output_[i]);
      }
John Koleszar's avatar
John Koleszar committed
396
    }
James Zern's avatar
James Zern committed
397
  }
John Koleszar's avatar
John Koleszar committed
398

399
  uint8_t *input() const {
400
    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
401
    if (UUT_->use_highbd_ == 0) {
402
      return input_ + offset;
403
    } else {
404
      return CONVERT_TO_BYTEPTR(input16_) + offset;
405
    }
James Zern's avatar
James Zern committed
406
  }
John Koleszar's avatar
John Koleszar committed
407

408
  uint8_t *output() const {
409
    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
410
    if (UUT_->use_highbd_ == 0) {
411
      return output_ + offset;
412
    } else {
413
      return CONVERT_TO_BYTEPTR(output16_) + offset;
414 415 416
    }
  }

417
  uint8_t *output_ref() const {
418
    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
419
    if (UUT_->use_highbd_ == 0) {
420
      return output_ref_ + offset;
421
    } else {
422
      return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
423 424 425
    }
  }

426
  uint16_t lookup(uint8_t *list, int index) const {
427
    if (UUT_->use_highbd_ == 0) {
428 429 430 431 432 433 434
      return list[index];
    } else {
      return CONVERT_TO_SHORTPTR(list)[index];
    }
  }

  void assign_val(uint8_t *list, int index, uint16_t val) const {
435
    if (UUT_->use_highbd_ == 0) {
clang-format's avatar
clang-format committed
436
      list[index] = (uint8_t)val;
437 438 439 440 441
    } else {
      CONVERT_TO_SHORTPTR(list)[index] = val;
    }
  }

clang-format's avatar
clang-format committed
442
  void wrapper_filter_average_block2d_8_c(
Yaowu Xu's avatar
Yaowu Xu committed
443 444 445
      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
      unsigned int output_width, unsigned int output_height) {
446
    if (UUT_->use_highbd_ == 0) {
clang-format's avatar
clang-format committed
447 448
      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
                                 dst_stride, output_width, output_height);
449
    } else {
clang-format's avatar
clang-format committed
450 451 452 453
      highbd_filter_average_block2d_8_c(
          CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
          CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
          UUT_->use_highbd_);
454 455 456
    }
  }

Yaowu Xu's avatar
Yaowu Xu committed
457 458 459 460
  void wrapper_filter_block2d_8_c(
      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
      unsigned int output_width, unsigned int output_height) {
461
    if (UUT_->use_highbd_ == 0) {
clang-format's avatar
clang-format committed
462 463
      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
                         dst_stride, output_width, output_height);
464
    } else {
465
      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
clang-format's avatar
clang-format committed
466 467 468
                                HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
                                dst_stride, output_width, output_height,
                                UUT_->use_highbd_);
469
    }
James Zern's avatar
James Zern committed
470 471
  }

clang-format's avatar
clang-format committed
472 473 474 475 476 477 478
  const ConvolveFunctions *UUT_;
  static uint8_t *input_;
  static uint8_t *output_;
  static uint8_t *output_ref_;
  static uint16_t *input16_;
  static uint16_t *output16_;
  static uint16_t *output16_ref_;
479
  int mask_;
John Koleszar's avatar
John Koleszar committed
480
};
481

clang-format's avatar
clang-format committed
482 483 484 485 486 487
uint8_t *ConvolveTest::input_ = NULL;
uint8_t *ConvolveTest::output_ = NULL;
uint8_t *ConvolveTest::output_ref_ = NULL;
uint16_t *ConvolveTest::input16_ = NULL;
uint16_t *ConvolveTest::output16_ = NULL;
uint16_t *ConvolveTest::output16_ref_ = NULL;
John Koleszar's avatar
John Koleszar committed
488

clang-format's avatar
clang-format committed
489
TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
John Koleszar's avatar
John Koleszar committed
490

491
TEST_P(ConvolveTest, Copy) {
clang-format's avatar
clang-format committed
492 493
  uint8_t *const in = input();
  uint8_t *const out = output();
494

clang-format's avatar
clang-format committed
495 496
  ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
                                       NULL, 0, NULL, 0, Width(), Height()));
497 498 499 500 501 502 503 504 505 506 507

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                lookup(in, y * kInputStride + x))
          << "(" << x << "," << y << ")";
}

TEST_P(ConvolveTest, Avg) {
clang-format's avatar
clang-format committed
508 509 510
  uint8_t *const in = input();
  uint8_t *const out = output();
  uint8_t *const out_ref = output_ref();
511 512
  CopyOutputToRef();

clang-format's avatar
clang-format committed
513 514
  ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride,
                                      NULL, 0, NULL, 0, Width(), Height()));
515 516 517 518 519 520 521

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
clang-format's avatar
clang-format committed
522 523
                                       lookup(out_ref, y * kOutputStride + x),
                                   1))
524 525 526
          << "(" << x << "," << y << ")";
}

John Koleszar's avatar
John Koleszar committed
527
TEST_P(ConvolveTest, CopyHoriz) {
clang-format's avatar
clang-format committed
528 529 530 531
  uint8_t *const in = input();
  uint8_t *const out = output();
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
John Koleszar's avatar
John Koleszar committed
532

clang-format's avatar
clang-format committed
533 534 535
  ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride,
                                      filter8, 16, filter8, 16, Width(),
                                      Height()));
John Koleszar's avatar
John Koleszar committed
536 537 538 539 540

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
541 542
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                lookup(in, y * kInputStride + x))
John Koleszar's avatar
John Koleszar committed
543 544 545 546
          << "(" << x << "," << y << ")";
}

TEST_P(ConvolveTest, CopyVert) {
clang-format's avatar
clang-format committed
547 548 549 550
  uint8_t *const in = input();
  uint8_t *const out = output();
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
John Koleszar's avatar
John Koleszar committed
551

clang-format's avatar
clang-format committed
552 553 554
  ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride,
                                      filter8, 16, filter8, 16, Width(),
                                      Height()));
John Koleszar's avatar
John Koleszar committed
555 556 557 558 559

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
560 561
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                lookup(in, y * kInputStride + x))
John Koleszar's avatar
John Koleszar committed
562 563 564 565
          << "(" << x << "," << y << ")";
}

TEST_P(ConvolveTest, Copy2D) {
clang-format's avatar
clang-format committed
566 567 568 569
  uint8_t *const in = input();
  uint8_t *const out = output();
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
John Koleszar's avatar
John Koleszar committed
570

clang-format's avatar
clang-format committed
571 572 573
  ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
                                       filter8, 16, filter8, 16, Width(),
                                       Height()));
John Koleszar's avatar
John Koleszar committed
574 575 576 577 578

  CheckGuardBlocks();

  for (int y = 0; y < Height(); ++y)
    for (int x = 0; x < Width(); ++x)
579 580
      ASSERT_EQ(lookup(out, y * kOutputStride + x),
                lookup(in, y * kInputStride + x))
John Koleszar's avatar
John Koleszar committed
581 582 583
          << "(" << x << "," << y << ")";
}

Angie Chiang's avatar
Angie Chiang committed
584
const int kNumFilterBanks = SWITCHABLE_FILTERS;
585 586 587 588
const int kNumFilters = 16;

TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
589
    const InterpFilter filter = (InterpFilter)filter_bank;
590
    const InterpKernel *filters =
591
        (const InterpKernel *)av1_get_interp_filter_kernel(filter);
592
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
593
    const InterpFilterParams filter_params =
594
        av1_get_interp_filter_params(filter);
Angie Chiang's avatar
Angie Chiang committed
595 596
    if (filter_params.taps != SUBPEL_TAPS) continue;
#endif
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
    for (int i = 0; i < kNumFilters; i++) {
      const int p0 = filters[i][0] + filters[i][1];
      const int p1 = filters[i][2] + filters[i][3];
      const int p2 = filters[i][4] + filters[i][5];
      const int p3 = filters[i][6] + filters[i][7];
      EXPECT_LE(p0, 128);
      EXPECT_LE(p1, 128);
      EXPECT_LE(p2, 128);
      EXPECT_LE(p3, 128);
      EXPECT_LE(p0 + p3, 128);
      EXPECT_LE(p0 + p3 + p1, 128);
      EXPECT_LE(p0 + p3 + p1 + p2, 128);
      EXPECT_EQ(p0 + p1 + p2 + p3, 128);
    }
  }
}
613

614 615
const int16_t kInvalidFilter[8] = { 0 };

John Koleszar's avatar
John Koleszar committed
616
TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
clang-format's avatar
clang-format committed
617 618
  uint8_t *const in = input();
  uint8_t *const out = output();
619 620
  uint8_t ref8[kOutputStride * kMaxDimension];
  uint16_t ref16[kOutputStride * kMaxDimension];
clang-format's avatar
clang-format committed
621
  uint8_t *ref;
622
  if (UUT_->use_highbd_ == 0) {
623 624 625 626
    ref = ref8;
  } else {
    ref = CONVERT_TO_BYTEPTR(ref16);
  }
627 628

  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
629
    const InterpFilter filter = (InterpFilter)filter_bank;
630
    const InterpKernel *filters =
631
        (const InterpKernel *)av1_get_interp_filter_kernel(filter);
632
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
633
    const InterpFilterParams filter_params =
634
        av1_get_interp_filter_params(filter);
Angie Chiang's avatar
Angie Chiang committed
635 636
    if (filter_params.taps != SUBPEL_TAPS) continue;
#endif
637

638 639
    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
clang-format's avatar
clang-format committed
640 641
        wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
                                   filters[filter_y], ref, kOutputStride,
642
                                   Width(), Height());
643

644
        if (filter_x && filter_y)
clang-format's avatar
clang-format committed
645 646 647
          ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
              filters[filter_y], 16, Width(), Height()));
648
        else if (filter_y)
649
          ASM_REGISTER_STATE_CHECK(
clang-format's avatar
clang-format committed
650 651
              UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
                        16, filters[filter_y], 16, Width(), Height()));
652
        else if (filter_x)
653
          ASM_REGISTER_STATE_CHECK(
clang-format's avatar
clang-format committed
654 655
              UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
                        16, kInvalidFilter, 16, Width(), Height()));
656 657
        else
          ASM_REGISTER_STATE_CHECK(
clang-format's avatar
clang-format committed
658 659
              UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
                          0, kInvalidFilter, 0, Width(), Height()));
660 661 662 663 664

        CheckGuardBlocks();

        for (int y = 0; y < Height(); ++y)
          for (int x = 0; x < Width(); ++x)
665 666
            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
                      lookup(out, y * kOutputStride + x))
667
                << "mismatch at (" << x << "," << y << "), "
clang-format's avatar
clang-format committed
668 669
                << "filters (" << filter_bank << "," << filter_x << ","
                << filter_y << ")";
670
      }
John Koleszar's avatar
John Koleszar committed
671 672 673 674 675
    }
  }
}

TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
clang-format's avatar
clang-format committed
676 677
  uint8_t *const in = input();
  uint8_t *const out = output();
678 679
  uint8_t ref8[kOutputStride * kMaxDimension];
  uint16_t ref16[kOutputStride * kMaxDimension];
clang-format's avatar
clang-format committed
680
  uint8_t *ref;
681
  if (UUT_->use_highbd_ == 0) {
682 683 684 685
    ref = ref8;
  } else {
    ref = CONVERT_TO_BYTEPTR(ref16);
  }
John Koleszar's avatar
John Koleszar committed
686 687

  // Populate ref and out with some random data
688
  ::libaom_test::ACMRandom prng;
John Koleszar's avatar
John Koleszar committed
689 690
  for (int y = 0; y < Height(); ++y) {
    for (int x = 0; x < Width(); ++x) {
691
      uint16_t r;
692
      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
693 694 695 696 697 698
        r = prng.Rand8Extremes();
      } else {
        r = prng.Rand16() & mask_;
      }
      assign_val(out, y * kOutputStride + x, r);
      assign_val(ref, y * kOutputStride + x, r);
John Koleszar's avatar
John Koleszar committed
699 700 701
    }
  }

702
  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
703
    const InterpFilter filter = (InterpFilter)filter_bank;
704
    const InterpKernel *filters =
705
        (const InterpKernel *)av1_get_interp_filter_kernel(filter);
706
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
707
    const InterpFilterParams filter_params =
708
        av1_get_interp_filter_params(filter);
Angie Chiang's avatar
Angie Chiang committed
709 710
    if (filter_params.taps != SUBPEL_TAPS) continue;
#endif
711 712 713

    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
clang-format's avatar
clang-format committed
714 715 716
        wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x],
                                           filters[filter_y], ref,
                                           kOutputStride, Width(), Height());
717

718
        if (filter_x && filter_y)
clang-format's avatar
clang-format committed
719 720 721
          ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_(
              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
              filters[filter_y], 16, Width(), Height()));
722
        else if (filter_y)
clang-format's avatar
clang-format committed
723 724 725
          ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_(
              in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
              filters[filter_y], 16, Width(), Height()));
726
        else if (filter_x)
clang-format's avatar
clang-format committed
727 728 729
          ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_(
              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
              kInvalidFilter, 16, Width(), Height()));
730 731
        else
          ASM_REGISTER_STATE_CHECK(
clang-format's avatar
clang-format committed
732 733
              UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter,
                         0, kInvalidFilter, 0, Width(), Height()));
734 735 736 737 738

        CheckGuardBlocks();

        for (int y = 0; y < Height(); ++y)
          for (int x = 0; x < Width(); ++x)
739 740
            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
                      lookup(out, y * kOutputStride + x))
741
                << "mismatch at (" << x << "," << y << "), "
clang-format's avatar
clang-format committed
742 743
                << "filters (" << filter_bank << "," << filter_x << ","
                << filter_y << ")";
744
      }
John Koleszar's avatar
John Koleszar committed
745 746 747 748
    }
  }
}

749 750 751 752 753 754
TEST_P(ConvolveTest, FilterExtremes) {
  uint8_t *const in = input();
  uint8_t *const out = output();
  uint8_t ref8[kOutputStride * kMaxDimension];
  uint16_t ref16[kOutputStride * kMaxDimension];
  uint8_t *ref;
755
  if (UUT_->use_highbd_ == 0) {
756 757 758 759 760 761
    ref = ref8;
  } else {
    ref = CONVERT_TO_BYTEPTR(ref16);
  }

  // Populate ref and out with some random data
762
  ::libaom_test::ACMRandom prng;
763 764 765
  for (int y = 0; y < Height(); ++y) {
    for (int x = 0; x < Width(); ++x) {
      uint16_t r;
766
      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
767 768 769 770 771 772 773 774 775 776 777 778 779 780
        r = prng.Rand8Extremes();
      } else {
        r = prng.Rand16() & mask_;
      }
      assign_val(out, y * kOutputStride + x, r);
      assign_val(ref, y * kOutputStride + x, r);
    }
  }

  for (int axis = 0; axis < 2; axis++) {
    int seed_val = 0;
    while (seed_val < 256) {
      for (int y = 0; y < 8; ++y) {
        for (int x = 0; x < 8; ++x) {
clang-format's avatar
clang-format committed
781 782
          assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
                     ((seed_val >> (axis ? y : x)) & 1) * mask_);
783 784 785
          if (axis) seed_val++;
        }
        if (axis)
clang-format's avatar
clang-format committed
786
          seed_val -= 8;
787 788 789 790 791 792
        else
          seed_val++;
      }
      if (axis) seed_val += 8;

      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
793
        const InterpFilter filter = (InterpFilter)filter_bank;
794
        const InterpKernel *filters =
795
            (const InterpKernel *)av1_get_interp_filter_kernel(filter);
796
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
797
        const InterpFilterParams filter_params =
798
            av1_get_interp_filter_params(filter);
Angie Chiang's avatar
Angie Chiang committed
799 800
        if (filter_params.taps != SUBPEL_TAPS) continue;
#endif
801 802
        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
clang-format's avatar
clang-format committed
803 804
            wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
                                       filters[filter_y], ref, kOutputStride,
805
                                       Width(), Height());
806
            if (filter_x && filter_y)
clang-format's avatar
clang-format committed
807 808 809
              ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                  filters[filter_y], 16, Width(), Height()));
810
            else if (filter_y)
clang-format's avatar
clang-format committed
811 812 813
              ASM_REGISTER_STATE_CHECK(UUT_->v8_(
                  in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
                  filters[filter_y], 16, Width(), Height()));
814
            else if (filter_x)
clang-format's avatar
clang-format committed
815 816 817
              ASM_REGISTER_STATE_CHECK(UUT_->h8_(
                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                  kInvalidFilter, 16, Width(), Height()));
818
            else
clang-format's avatar
clang-format committed
819 820 821
              ASM_REGISTER_STATE_CHECK(UUT_->copy_(
                  in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
                  kInvalidFilter, 0, Width(), Height()));
822 823 824 825 826 827

            for (int y = 0; y < Height(); ++y)
              for (int x = 0; x < Width(); ++x)
                ASSERT_EQ(lookup(ref, y * kOutputStride + x),
                          lookup(out, y * kOutputStride + x))
                    << "mismatch at (" << x << "," << y << "), "
clang-format's avatar
clang-format committed
828 829
                    << "filters (" << filter_bank << "," << filter_x << ","
                    << filter_y << ")";
830 831 832 833 834 835 836
          }
        }
      }
    }
  }
}

837 838 839
/* This test exercises that enough rows and columns are filtered with every
   possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest, CheckScalingFiltering) {
clang-format's avatar
clang-format committed
840 841
  uint8_t *const in = input();
  uint8_t *const out = output();
Angie Chiang's avatar
Angie Chiang committed
842 843
  const InterpKernel *const eighttap =
      (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR);
844 845 846 847 848 849

  SetConstantInput(127);

  for (int frac = 0; frac < 16; ++frac) {
    for (int step = 1; step <= 32; ++step) {
      /* Test the horizontal and vertical filters in combination. */
850
      ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
clang-format's avatar
clang-format committed
851 852
                                           eighttap[frac], step, eighttap[frac],
                                           step, Width(), Height()));
853 854 855 856 857

      CheckGuardBlocks();

      for (int y = 0; y < Height(); ++y) {
        for (int x = 0; x < Width(); ++x) {
858 859
          ASSERT_EQ(lookup(in, y * kInputStride + x),
                    lookup(out, y * kOutputStride + x))
clang-format's avatar
clang-format committed
860 861
              << "x == " << x << ", y == " << y << ", frac == " << frac
              << ", step == " << step;
862 863 864 865 866 867
        }
      }
    }
  }
}

868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  aom_usec_timer timer;

  aom_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
                height);
  }
  aom_usec_timer_mark(&timer);

  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
  printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
}

TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  aom_usec_timer timer;

  aom_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
               height);
  }
  aom_usec_timer_mark(&timer);

  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
  printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
}

908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
TEST_P(ConvolveTest, DISABLED_Speed) {
  uint8_t *const in = input();
  uint8_t *const out = output();
  uint8_t ref8[kOutputStride * kMaxDimension];
  uint16_t ref16[kOutputStride * kMaxDimension];
  uint8_t *ref;
  if (UUT_->use_highbd_ == 0) {
    ref = ref8;
  } else {
    ref = CONVERT_TO_BYTEPTR(ref16);
  }

  // Populate ref and out with some random data
  ::libaom_test::ACMRandom prng;
  for (int y = 0; y < Height(); ++y) {
    for (int x = 0; x < Width(); ++x) {
      uint16_t r;
      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
        r = prng.Rand8Extremes();
      } else {
        r = prng.Rand16() & mask_;
      }
      assign_val(out, y * kOutputStride + x, r);
      assign_val(ref, y * kOutputStride + x, r);
    }
  }

  const InterpFilter filter = (InterpFilter)1;
  const InterpKernel *filters =
      (const InterpKernel *)av1_get_interp_filter_kernel(filter);
  wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
                                     out, kOutputStride, Width(), Height());

  aom_usec_timer timer;
  int tests_num = 1000;

  aom_usec_timer_start(&timer);
  while (tests_num > 0) {
    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
      const InterpFilter filter = (InterpFilter)filter_bank;
      const InterpKernel *filters =
          (const InterpKernel *)av1_get_interp_filter_kernel(filter);
#if CONFIG_DUAL_FILTER
      const InterpFilterParams filter_params =
          av1_get_interp_filter_params(filter);
      if (filter_params.taps != SUBPEL_TAPS) continue;
#endif

      for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
        for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
          if (filter_x && filter_y)
            ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                filters[filter_y], 16, Width(), Height()));
          if (filter_y)
            ASM_REGISTER_STATE_CHECK(
                UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
                          16, filters[filter_y], 16, Width(), Height()));
          else if (filter_x)
            ASM_REGISTER_STATE_CHECK(UUT_->h8_(
                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                kInvalidFilter, 16, Width(), Height()));
        }
      }
    }
    tests_num--;
  }
  aom_usec_timer_mark(&timer);

  const int elapsed_time =
      static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
  printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
         UUT_->use_highbd_, elapsed_time);
}

John Koleszar's avatar
John Koleszar committed
983 984
using std::tr1::make_tuple;

clang-format's avatar
clang-format committed
985 986 987 988 989
#define WRAP(func, bd)                                                       \
  void wrap_##func##_##bd(                                                   \
      const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
      ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
      const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
990
    aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
clang-format's avatar
clang-format committed
991 992
                      filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
  }
993
#if HAVE_SSE2 && ARCH_X86_64
994 995 996 997 998 999
WRAP(convolve_copy_sse2, 8)
WRAP(convolve_avg_sse2, 8)
WRAP(convolve_copy_sse2, 10)
WRAP(convolve_avg_sse2, 10)
WRAP(convolve_copy_sse2, 12)
WRAP(convolve_avg_sse2, 12)
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
WRAP(convolve8_horiz_sse2, 8)
WRAP(convolve8_avg_horiz_sse2, 8)
WRAP(convolve8_vert_sse2, 8)
WRAP(convolve8_avg_vert_sse2, 8)
WRAP(convolve8_sse2, 8)
WRAP(convolve8_avg_sse2, 8)
WRAP(convolve8_horiz_sse2, 10)
WRAP(convolve8_avg_horiz_sse2, 10)
WRAP(convolve8_vert_sse2, 10)
WRAP(convolve8_avg_vert_sse2, 10)
WRAP(convolve8_sse2, 10)
WRAP(convolve8_avg_sse2, 10)
WRAP(convolve8_horiz_sse2, 12)
WRAP(convolve8_avg_horiz_sse2, 12)
WRAP(convolve8_vert_sse2, 12)
WRAP(convolve8_avg_vert_sse2, 12)
WRAP(convolve8_sse2, 12)
WRAP(convolve8_avg_sse2, 12)
1018 1019
#endif  // HAVE_SSE2 && ARCH_X86_64

1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
WRAP(convolve_copy_c, 8)
WRAP(convolve_avg_c, 8)
WRAP(convolve8_horiz_c, 8)
WRAP(convolve8_avg_horiz_c, 8)
WRAP(convolve8_vert_c, 8)
WRAP(convolve8_avg_vert_c, 8)
WRAP(convolve8_c, 8)
WRAP(convolve8_avg_c, 8)
WRAP(convolve_copy_c, 10)
WRAP(convolve_avg_c, 10)
WRAP(convolve8_horiz_c, 10)
WRAP(convolve8_avg_horiz_c, 10)
WRAP(convolve8_vert_c, 10)
WRAP(convolve8_avg_vert_c, 10)
WRAP(convolve8_c, 10)
WRAP(convolve8_avg_c, 10)
WRAP(convolve_copy_c, 12)
WRAP(convolve_avg_c, 12)
WRAP(convolve8_horiz_c, 12)
WRAP(convolve8_avg_horiz_c, 12)
WRAP(convolve8_vert_c, 12)
WRAP(convolve8_avg_vert_c, 12)
WRAP(convolve8_c, 12)
WRAP(convolve8_avg_c, 12)
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073

#if HAVE_AVX2
WRAP(convolve_copy_avx2, 8)
WRAP(convolve_avg_avx2, 8)
WRAP(convolve8_horiz_avx2, 8)
WRAP(convolve8_avg_horiz_avx2, 8)
WRAP(convolve8_vert_avx2, 8)
WRAP(convolve8_avg_vert_avx2, 8)
WRAP(convolve8_avx2, 8)
WRAP(convolve8_avg_avx2, 8)

WRAP(convolve_copy_avx2, 10)
WRAP(convolve_avg_avx2, 10)
WRAP(convolve8_avx2, 10)
WRAP(convolve8_horiz_avx2, 10)
WRAP(convolve8_vert_avx2, 10)
WRAP(convolve8_avg_avx2, 10)
WRAP(convolve8_avg_horiz_avx2, 10)
WRAP(convolve8_avg_vert_avx2, 10)

WRAP(convolve_copy_avx2, 12)
WRAP(convolve_avg_avx2, 12)
WRAP(convolve8_avx2, 12)
WRAP(convolve8_horiz_avx2, 12)
WRAP(convolve8_vert_avx2, 12)
WRAP(convolve8_avg_avx2, 12)
WRAP(convolve8_avg_horiz_avx2, 12)
WRAP(convolve8_avg_vert_avx2, 12)
#endif  // HAVE_AVX2

1074
#undef WRAP
1075 1076

const ConvolveFunctions convolve8_c(
clang-format's avatar
clang-format committed
1077 1078 1079
    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
    wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
    wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1080
    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
clang-format's avatar
clang-format committed
1081 1082
    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
    wrap_convolve8_avg_c_8, 8);
1083
const ConvolveFunctions convolve10_c(
clang-format's avatar
clang-format committed
1084 1085 1086
    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
    wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
    wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1087
    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
clang-format's avatar
clang-format committed
1088 1089
    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
    wrap_convolve8_avg_c_10, 10);
1090
const ConvolveFunctions convolve12_c(
clang-format's avatar
clang-format committed
1091 1092 1093
    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
    wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
    wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1094
    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
clang-format's avatar
clang-format committed
1095 1096
    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
    wrap_convolve8_avg_c_12, 12);
1097
const ConvolveParam kArrayConvolve_c[] = {
clang-format's avatar
clang-format committed
1098
  ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
1099
};
1100

clang-format's avatar
clang-format committed
1101
INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
1102

1103
#if HAVE_SSE2 && ARCH_X86_64
1104
const ConvolveFunctions convolve8_sse2(
1105
    wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
1106 1107
    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1108 1109 1110
    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1111
    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1112
const ConvolveFunctions convolve10_sse2(