av1_convolve_test.cc 19.5 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8 9 10 11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12 13
#include "third_party/googletest/src/include/gtest/gtest.h"

Yaowu Xu's avatar
Yaowu Xu committed
14 15
#include "./av1_rtcd.h"
#include "./aom_dsp_rtcd.h"
16
#include "test/acm_random.h"
17
#include "av1/common/filter.h"
18
#include "av1/common/convolve.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_ports/mem.h"
21

22
using libaom_test::ACMRandom;
23 24

namespace {
25
void setup_convolve() {
Yaowu Xu's avatar
Yaowu Xu committed
26
#if HAVE_SSSE3 && CONFIG_RUNTIME_CPU_DETECT
Yaowu Xu's avatar
Yaowu Xu committed
27 28
  av1_convolve_horiz = av1_convolve_horiz_c;
  av1_convolve_vert = av1_convolve_vert_c;
29 30 31
#endif
}

Yaowu Xu's avatar
Yaowu Xu committed
32
TEST(AV1ConvolveTest, av1_convolve8) {
33
  ACMRandom rnd(ACMRandom::DeterministicSeed());
34
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
35 36
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
37
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
38
      av1_get_interp_filter_params(interp_filter[0]);
39
#else
James Zern's avatar
James Zern committed
40
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
41
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
42
      av1_get_interp_filter_params(interp_filter);
43
#endif
44
  int filter_size = filter_params.taps;
45 46 47
  int filter_center = filter_size / 2 - 1;
  uint8_t src[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
48 49
  uint8_t dst[1] = { 0 };
  uint8_t dst1[1] = { 0 };
50 51 52 53 54 55 56 57 58 59
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int subpel_x_q4 = 3;
  int subpel_y_q4 = 2;
  int avg = 0;

  int w = 1;
  int h = 1;

60
  setup_convolve();
61

62 63 64 65
  for (int i = 0; i < filter_size * filter_size; i++) {
    src[i] = rnd.Rand16() % (1 << 8);
  }

Yaowu Xu's avatar
Yaowu Xu committed
66 67 68
  av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
               dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
               subpel_y_q4, y_step_q4, avg);
69

clang-format's avatar
clang-format committed
70
  const int16_t *x_filter =
Yaowu Xu's avatar
Yaowu Xu committed
71
      av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format's avatar
clang-format committed
72
  const int16_t *y_filter =
Yaowu Xu's avatar
Yaowu Xu committed
73
      av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
74

Yaowu Xu's avatar
Yaowu Xu committed
75
  aom_convolve8_c(src + src_stride * filter_center + filter_center, src_stride,
76 77 78
                  dst1, dst_stride, x_filter, 16, y_filter, 16, w, h);
  EXPECT_EQ(dst[0], dst1[0]);
}
Yaowu Xu's avatar
Yaowu Xu committed
79
TEST(AV1ConvolveTest, av1_convolve) {
80
  ACMRandom rnd(ACMRandom::DeterministicSeed());
81
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
82 83
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
84
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
85
      av1_get_interp_filter_params(interp_filter[0]);
86
#else
James Zern's avatar
James Zern committed
87
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
88
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
89
      av1_get_interp_filter_params(interp_filter);
90
#endif
91
  int filter_size = filter_params.taps;
92 93 94
  int filter_center = filter_size / 2 - 1;
  uint8_t src[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
95
  uint8_t dst[1] = { 0 };
96 97 98 99 100 101 102
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int avg = 0;
  int w = 1;
  int h = 1;

Angie Chiang's avatar
Angie Chiang committed
103 104 105
  int subpel_x_q4;
  int subpel_y_q4;

106
  setup_convolve();
107

108 109 110 111
  for (int i = 0; i < filter_size * filter_size; i++) {
    src[i] = rnd.Rand16() % (1 << 8);
  }

Angie Chiang's avatar
Angie Chiang committed
112 113
  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
Yaowu Xu's avatar
Yaowu Xu committed
114 115 116
      av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
                   dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
                   subpel_y_q4, y_step_q4, avg);
Angie Chiang's avatar
Angie Chiang committed
117

clang-format's avatar
clang-format committed
118
      const int16_t *x_filter =
Yaowu Xu's avatar
Yaowu Xu committed
119
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format's avatar
clang-format committed
120
      const int16_t *y_filter =
Yaowu Xu's avatar
Yaowu Xu committed
121
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiang's avatar
Angie Chiang committed
122 123 124 125 126 127 128 129 130 131 132 133 134

      int temp[12];
      int dst_ref = 0;
      for (int r = 0; r < filter_size; r++) {
        temp[r] = 0;
        for (int c = 0; c < filter_size; c++) {
          temp[r] += x_filter[c] * src[r * filter_size + c];
        }
        temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
        dst_ref += temp[r] * y_filter[r];
      }
      dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
      EXPECT_EQ(dst[0], dst_ref);
135 136 137 138
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
139
TEST(AV1ConvolveTest, av1_convolve_avg) {
140
  ACMRandom rnd(ACMRandom::DeterministicSeed());
141
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
142 143
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
144
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
145
      av1_get_interp_filter_params(interp_filter[0]);
146
#else
James Zern's avatar
James Zern committed
147
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
148
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
149
      av1_get_interp_filter_params(interp_filter);
150
#endif
151
  int filter_size = filter_params.taps;
152 153 154 155
  int filter_center = filter_size / 2 - 1;
  uint8_t src0[12 * 12];
  uint8_t src1[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
156 157 158
  uint8_t dst0[1] = { 0 };
  uint8_t dst1[1] = { 0 };
  uint8_t dst[1] = { 0 };
159 160 161 162 163 164 165 166
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int avg = 0;

  int w = 1;
  int h = 1;

Angie Chiang's avatar
Angie Chiang committed
167 168 169
  int subpel_x_q4;
  int subpel_y_q4;

170
  setup_convolve();
171

172 173 174 175 176 177 178
  for (int i = 0; i < filter_size * filter_size; i++) {
    src0[i] = rnd.Rand16() % (1 << 8);
    src1[i] = rnd.Rand16() % (1 << 8);
  }

  int offset = filter_size * filter_center + filter_center;

Angie Chiang's avatar
Angie Chiang committed
179 180 181
  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
182 183 184
      av1_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                   y_step_q4, avg);
Angie Chiang's avatar
Angie Chiang committed
185
      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
186 187 188
      av1_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                   y_step_q4, avg);
Angie Chiang's avatar
Angie Chiang committed
189 190

      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
191 192 193
      av1_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                   y_step_q4, avg);
Angie Chiang's avatar
Angie Chiang committed
194
      avg = 1;
Yaowu Xu's avatar
Yaowu Xu committed
195 196 197
      av1_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                   y_step_q4, avg);
Angie Chiang's avatar
Angie Chiang committed
198 199 200 201

      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
    }
  }
202 203
}

Yaowu Xu's avatar
Yaowu Xu committed
204 205
#if CONFIG_AOM_HIGHBITDEPTH
TEST(AV1ConvolveTest, av1_highbd_convolve) {
206
  ACMRandom rnd(ACMRandom::DeterministicSeed());
207
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
208 209
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
210
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
211
      av1_get_interp_filter_params(interp_filter[0]);
212
#else
James Zern's avatar
James Zern committed
213
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
214
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
215
      av1_get_interp_filter_params(interp_filter);
216
#endif
217
  int filter_size = filter_params.taps;
218 219 220
  int filter_center = filter_size / 2 - 1;
  uint16_t src[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
221
  uint16_t dst[1] = { 0 };
222 223 224 225 226 227 228 229
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int avg = 0;
  int bd = 10;
  int w = 1;
  int h = 1;

Angie Chiang's avatar
Angie Chiang committed
230 231 232
  int subpel_x_q4;
  int subpel_y_q4;

233 234 235 236
  for (int i = 0; i < filter_size * filter_size; i++) {
    src[i] = rnd.Rand16() % (1 << bd);
  }

Angie Chiang's avatar
Angie Chiang committed
237 238
  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
Yaowu Xu's avatar
Yaowu Xu committed
239
      av1_highbd_convolve(
Angie Chiang's avatar
Angie Chiang committed
240
          CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
241
          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filter,
Angie Chiang's avatar
Angie Chiang committed
242 243
          subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);

clang-format's avatar
clang-format committed
244
      const int16_t *x_filter =
Yaowu Xu's avatar
Yaowu Xu committed
245
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format's avatar
clang-format committed
246
      const int16_t *y_filter =
Yaowu Xu's avatar
Yaowu Xu committed
247
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiang's avatar
Angie Chiang committed
248 249 250 251 252 253 254 255 256 257 258 259 260 261

      int temp[12];
      int dst_ref = 0;
      for (int r = 0; r < filter_size; r++) {
        temp[r] = 0;
        for (int c = 0; c < filter_size; c++) {
          temp[r] += x_filter[c] * src[r * filter_size + c];
        }
        temp[r] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
        dst_ref += temp[r] * y_filter[r];
      }
      dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
      EXPECT_EQ(dst[0], dst_ref);
262 263 264 265
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
266
TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
267
  ACMRandom rnd(ACMRandom::DeterministicSeed());
268
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
269 270
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
271
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
272
      av1_get_interp_filter_params(interp_filter[0]);
273
#else
James Zern's avatar
James Zern committed
274
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
275
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
276
      av1_get_interp_filter_params(interp_filter);
277
#endif
278
  int filter_size = filter_params.taps;
279 280 281 282
  int filter_center = filter_size / 2 - 1;
  uint16_t src0[12 * 12];
  uint16_t src1[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
283 284 285
  uint16_t dst0[1] = { 0 };
  uint16_t dst1[1] = { 0 };
  uint16_t dst[1] = { 0 };
286 287 288 289 290 291 292 293 294
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int avg = 0;
  int bd = 10;

  int w = 1;
  int h = 1;

Angie Chiang's avatar
Angie Chiang committed
295 296 297
  int subpel_x_q4;
  int subpel_y_q4;

298 299 300 301 302
  for (int i = 0; i < filter_size * filter_size; i++) {
    src0[i] = rnd.Rand16() % (1 << bd);
    src1[i] = rnd.Rand16() % (1 << bd);
  }

Angie Chiang's avatar
Angie Chiang committed
303 304 305 306 307
  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
      int offset = filter_size * filter_center + filter_center;

      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
308 309 310 311
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
312
      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
313 314 315 316
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
317 318

      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
319 320 321 322
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
323
      avg = 1;
Yaowu Xu's avatar
Yaowu Xu committed
324 325 326 327
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
328 329 330 331

      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
    }
  }
332
}
Yaowu Xu's avatar
Yaowu Xu committed
333
#endif  // CONFIG_AOM_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
334 335 336 337

#define CONVOLVE_SPEED_TEST 0
#if CONVOLVE_SPEED_TEST
#define highbd_convolve_speed(func, block_size, frame_size)                  \
Yaowu Xu's avatar
Yaowu Xu committed
338
  TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) {          \
Angie Chiang's avatar
Angie Chiang committed
339
    ACMRandom rnd(ACMRandom::DeterministicSeed());                           \
James Zern's avatar
James Zern committed
340
    InterpFilter interp_filter = EIGHTTAP;                                   \
Angie Chiang's avatar
Angie Chiang committed
341
    InterpFilterParams filter_params =                                       \
Yaowu Xu's avatar
Yaowu Xu committed
342
        av1_get_interp_filter_params(interp_filter);                         \
343
    int filter_size = filter_params.tap;                                     \
Angie Chiang's avatar
Angie Chiang committed
344 345
    int filter_center = filter_size / 2 - 1;                                 \
    DECLARE_ALIGNED(16, uint16_t,                                            \
clang-format's avatar
clang-format committed
346
                    src[(frame_size + 7) * (frame_size + 7)]) = { 0 };       \
Angie Chiang's avatar
Angie Chiang committed
347
    int src_stride = frame_size + 7;                                         \
clang-format's avatar
clang-format committed
348
    DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 };     \
Angie Chiang's avatar
Angie Chiang committed
349 350 351 352 353 354 355 356 357 358
    int dst_stride = frame_size;                                             \
    int x_step_q4 = 16;                                                      \
    int y_step_q4 = 16;                                                      \
    int subpel_x_q4 = 8;                                                     \
    int subpel_y_q4 = 6;                                                     \
    int bd = 10;                                                             \
                                                                             \
    int w = block_size;                                                      \
    int h = block_size;                                                      \
                                                                             \
clang-format's avatar
clang-format committed
359
    const int16_t *filter_x =                                                \
Yaowu Xu's avatar
Yaowu Xu committed
360
        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);            \
clang-format's avatar
clang-format committed
361
    const int16_t *filter_y =                                                \
Yaowu Xu's avatar
Yaowu Xu committed
362
        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);            \
Angie Chiang's avatar
Angie Chiang committed
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
                                                                             \
    for (int i = 0; i < src_stride * src_stride; i++) {                      \
      src[i] = rnd.Rand16() % (1 << bd);                                     \
    }                                                                        \
                                                                             \
    int offset = filter_center * src_stride + filter_center;                 \
    int row_offset = 0;                                                      \
    int col_offset = 0;                                                      \
    for (int i = 0; i < 100000; i++) {                                       \
      int src_total_offset = offset + col_offset * src_stride + row_offset;  \
      int dst_total_offset = col_offset * dst_stride + row_offset;           \
      func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride,           \
           CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
           x_step_q4, filter_y, y_step_q4, w, h, bd);                        \
      if (offset + w + w < frame_size) {                                     \
        row_offset += w;                                                     \
      } else {                                                               \
        row_offset = 0;                                                      \
        col_offset += h;                                                     \
      }                                                                      \
      if (col_offset + h >= frame_size) {                                    \
        col_offset = 0;                                                      \
      }                                                                      \
    }                                                                        \
  }

#define lowbd_convolve_speed(func, block_size, frame_size)                  \
Yaowu Xu's avatar
Yaowu Xu committed
390
  TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) {       \
Angie Chiang's avatar
Angie Chiang committed
391
    ACMRandom rnd(ACMRandom::DeterministicSeed());                          \
James Zern's avatar
James Zern committed
392
    InterpFilter interp_filter = EIGHTTAP;                                  \
Angie Chiang's avatar
Angie Chiang committed
393
    InterpFilterParams filter_params =                                      \
Yaowu Xu's avatar
Yaowu Xu committed
394
        av1_get_interp_filter_params(interp_filter);                        \
395
    int filter_size = filter_params.tap;                                    \
Angie Chiang's avatar
Angie Chiang committed
396 397 398 399 400 401 402 403 404 405 406 407 408 409
    int filter_center = filter_size / 2 - 1;                                \
    DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
    int src_stride = frame_size + 7;                                        \
    DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]);             \
    int dst_stride = frame_size;                                            \
    int x_step_q4 = 16;                                                     \
    int y_step_q4 = 16;                                                     \
    int subpel_x_q4 = 8;                                                    \
    int subpel_y_q4 = 6;                                                    \
    int bd = 8;                                                             \
                                                                            \
    int w = block_size;                                                     \
    int h = block_size;                                                     \
                                                                            \
clang-format's avatar
clang-format committed
410
    const int16_t *filter_x =                                               \
Yaowu Xu's avatar
Yaowu Xu committed
411
        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);           \
clang-format's avatar
clang-format committed
412
    const int16_t *filter_y =                                               \
Yaowu Xu's avatar
Yaowu Xu committed
413
        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);           \
Angie Chiang's avatar
Angie Chiang committed
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
                                                                            \
    for (int i = 0; i < src_stride * src_stride; i++) {                     \
      src[i] = rnd.Rand16() % (1 << bd);                                    \
    }                                                                       \
                                                                            \
    int offset = filter_center * src_stride + filter_center;                \
    int row_offset = 0;                                                     \
    int col_offset = 0;                                                     \
    for (int i = 0; i < 100000; i++) {                                      \
      func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4,  \
           filter_y, y_step_q4, w, h);                                      \
      if (offset + w + w < frame_size) {                                    \
        row_offset += w;                                                    \
      } else {                                                              \
        row_offset = 0;                                                     \
        col_offset += h;                                                    \
      }                                                                     \
      if (col_offset + h >= frame_size) {                                   \
        col_offset = 0;                                                     \
      }                                                                     \
    }                                                                       \
  }

// This experiment shows that when frame size is 64x64
Yaowu Xu's avatar
Yaowu Xu committed
438
// aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar.
Angie Chiang's avatar
Angie Chiang committed
439
// However when frame size becomes 1024x1024
Yaowu Xu's avatar
Yaowu Xu committed
440
// aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2
Angie Chiang's avatar
Angie Chiang committed
441
// we think the bottleneck is from memory IO
Yaowu Xu's avatar
Yaowu Xu committed
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64);

lowbd_convolve_speed(aom_convolve8_sse2, 8, 64);
lowbd_convolve_speed(aom_convolve8_sse2, 16, 64);
lowbd_convolve_speed(aom_convolve8_sse2, 32, 64);
lowbd_convolve_speed(aom_convolve8_sse2, 64, 64);

highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024);

lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024);
lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024);
lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024);
lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024);
Angie Chiang's avatar
Angie Chiang committed
461
#endif  // CONVOLVE_SPEED_TEST
462
}  // namespace