av1_convolve_test.cc 22.1 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1
2
3
4
5
6
7
8
9
10
11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
13
#include "third_party/googletest/src/include/gtest/gtest.h"

Yaowu Xu's avatar
Yaowu Xu committed
14
15
#include "./av1_rtcd.h"
#include "./aom_dsp_rtcd.h"
16
#include "test/acm_random.h"
17
#include "av1/common/filter.h"
18
#include "av1/common/convolve.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_ports/mem.h"
21

22
using libaom_test::ACMRandom;
23
24

namespace {
25
void setup_convolve() {
Yaowu Xu's avatar
Yaowu Xu committed
26
#if HAVE_SSSE3 && CONFIG_RUNTIME_CPU_DETECT
Yaowu Xu's avatar
Yaowu Xu committed
27
28
  av1_convolve_horiz = av1_convolve_horiz_c;
  av1_convolve_vert = av1_convolve_vert_c;
29
30
31
#endif
}

Yaowu Xu's avatar
Yaowu Xu committed
32
TEST(AV1ConvolveTest, av1_convolve8) {
33
  ACMRandom rnd(ACMRandom::DeterministicSeed());
34
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
35
36
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
37
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
38
      av1_get_interp_filter_params(interp_filter[0]);
39
#else
James Zern's avatar
James Zern committed
40
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
41
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
42
      av1_get_interp_filter_params(interp_filter);
43
#endif
44
  int filter_size = filter_params.taps;
45
46
47
  int filter_center = filter_size / 2 - 1;
  uint8_t src[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
48
49
  uint8_t dst[1] = { 0 };
  uint8_t dst1[1] = { 0 };
50
51
52
53
54
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int subpel_x_q4 = 3;
  int subpel_y_q4 = 2;
55
  const int plane = 0;
56
57
58
59

  int w = 1;
  int h = 1;

60
  ConvolveParams conv_params = get_conv_params(0, plane);
61

62
  setup_convolve();
63

64
65
66
67
  for (int i = 0; i < filter_size * filter_size; i++) {
    src[i] = rnd.Rand16() % (1 << 8);
  }

Yaowu Xu's avatar
Yaowu Xu committed
68
69
  av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
               dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
70
               subpel_y_q4, y_step_q4, &conv_params);
71

clang-format's avatar
clang-format committed
72
  const int16_t *x_filter =
Yaowu Xu's avatar
Yaowu Xu committed
73
      av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format's avatar
clang-format committed
74
  const int16_t *y_filter =
Yaowu Xu's avatar
Yaowu Xu committed
75
      av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
76

Yaowu Xu's avatar
Yaowu Xu committed
77
  aom_convolve8_c(src + src_stride * filter_center + filter_center, src_stride,
78
79
80
                  dst1, dst_stride, x_filter, 16, y_filter, 16, w, h);
  EXPECT_EQ(dst[0], dst1[0]);
}
Yaowu Xu's avatar
Yaowu Xu committed
81
TEST(AV1ConvolveTest, av1_convolve) {
82
  ACMRandom rnd(ACMRandom::DeterministicSeed());
83
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
84
85
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
86
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
87
      av1_get_interp_filter_params(interp_filter[0]);
88
#else
James Zern's avatar
James Zern committed
89
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
90
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
91
      av1_get_interp_filter_params(interp_filter);
92
#endif
93
  int filter_size = filter_params.taps;
94
95
96
  int filter_center = filter_size / 2 - 1;
  uint8_t src[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
97
  uint8_t dst[1] = { 0 };
98
99
100
101
102
103
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int w = 1;
  int h = 1;

Angie Chiang's avatar
Angie Chiang committed
104
105
  int subpel_x_q4;
  int subpel_y_q4;
106
  const int plane = 0;
Angie Chiang's avatar
Angie Chiang committed
107

108
  ConvolveParams conv_params = get_conv_params(0, plane);
109

110
  ASSERT_LE(filter_size, 12);
111
  setup_convolve();
112

113
  for (int i = 0; i < static_cast<int>(sizeof(src) / sizeof(src[0])); i++) {
114
115
116
    src[i] = rnd.Rand16() % (1 << 8);
  }

117
118
  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Yaowu Xu's avatar
Yaowu Xu committed
119
120
      av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
                   dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
121
                   subpel_y_q4, y_step_q4, &conv_params);
Angie Chiang's avatar
Angie Chiang committed
122

clang-format's avatar
clang-format committed
123
      const int16_t *x_filter =
Yaowu Xu's avatar
Yaowu Xu committed
124
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format's avatar
clang-format committed
125
      const int16_t *y_filter =
Yaowu Xu's avatar
Yaowu Xu committed
126
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiang's avatar
Angie Chiang committed
127
128
129
130
131
132
133
134
135
136
137
138
139

      int temp[12];
      int dst_ref = 0;
      for (int r = 0; r < filter_size; r++) {
        temp[r] = 0;
        for (int c = 0; c < filter_size; c++) {
          temp[r] += x_filter[c] * src[r * filter_size + c];
        }
        temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
        dst_ref += temp[r] * y_filter[r];
      }
      dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
      EXPECT_EQ(dst[0], dst_ref);
140
141
142
143
    }
  }
}

144
#if CONFIG_DUAL_FILTER
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
TEST(AV1ConvolveTest, av1_convolve_vert_first) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, MULTITAP_SHARP,
                                    EIGHTTAP_REGULAR, MULTITAP_SHARP };
  InterpFilterParams filter_params_x =
      av1_get_interp_filter_params(interp_filter[1]);
  InterpFilterParams filter_params_y =
      av1_get_interp_filter_params(interp_filter[0]);
  int filter_size_x = filter_params_x.taps;
  int filter_size_y = filter_params_y.taps;
  int filter_center_x = filter_size_x / 2 - 1;
  int filter_center_y = filter_size_y / 2 - 1;
  uint8_t src[12 * 12];
  int src_stride = filter_size_x;
  uint8_t dst[1] = { 0 };
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int w = 1;
  int h = 1;
165
  const int plane = 0;
166
167
168
169

  int subpel_x_q4;
  int subpel_y_q4;

170
  ConvolveParams conv_params = get_conv_params(0, plane);
171

172
173
174
175
176
177
178
179
  ASSERT_LE(filter_size_x, 12);
  ASSERT_LE(filter_size_y, 12);
  setup_convolve();

  for (int i = 0; i < static_cast<int>(sizeof(src) / sizeof(src[0])); i++) {
    src[i] = rnd.Rand16() % (1 << 8);
  }

180
181
  for (subpel_x_q4 = 1; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
    for (subpel_y_q4 = 1; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
182
183
      av1_convolve(src + src_stride * filter_center_y + filter_center_x,
                   src_stride, dst, dst_stride, w, h, interp_filter,
184
185
                   subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
                   &conv_params);
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208

      const int16_t *x_filter =
          av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x_q4);
      const int16_t *y_filter =
          av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y_q4);

      int temp[12];
      int dst_ref = 0;
      for (int c = 0; c < filter_size_x; c++) {
        temp[c] = 0;
        for (int r = 0; r < filter_size_y; r++) {
          temp[c] += y_filter[r] * src[r * filter_size_x + c];
        }
        temp[c] = clip_pixel(ROUND_POWER_OF_TWO(temp[c], FILTER_BITS));
        dst_ref += temp[c] * x_filter[c];
      }
      dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
      EXPECT_EQ(dst[0], dst_ref);
    }
  }
}
#endif

Yaowu Xu's avatar
Yaowu Xu committed
209
TEST(AV1ConvolveTest, av1_convolve_avg) {
210
  ACMRandom rnd(ACMRandom::DeterministicSeed());
211
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
212
213
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
214
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
215
      av1_get_interp_filter_params(interp_filter[0]);
216
#else
James Zern's avatar
James Zern committed
217
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
218
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
219
      av1_get_interp_filter_params(interp_filter);
220
#endif
221
  int filter_size = filter_params.taps;
222
223
224
225
  int filter_center = filter_size / 2 - 1;
  uint8_t src0[12 * 12];
  uint8_t src1[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
226
227
228
  uint8_t dst0[1] = { 0 };
  uint8_t dst1[1] = { 0 };
  uint8_t dst[1] = { 0 };
229
230
231
232
233
234
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;

  int w = 1;
  int h = 1;
235
  const int plane = 0;
236

Angie Chiang's avatar
Angie Chiang committed
237
238
239
  int subpel_x_q4;
  int subpel_y_q4;

240
  ConvolveParams conv_params = get_conv_params(0, plane);
241

242
  setup_convolve();
243

244
245
246
247
248
249
250
  for (int i = 0; i < filter_size * filter_size; i++) {
    src0[i] = rnd.Rand16() % (1 << 8);
    src1[i] = rnd.Rand16() % (1 << 8);
  }

  int offset = filter_size * filter_center + filter_center;

251
252
  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
253
      conv_params.ref = 0;
Yaowu Xu's avatar
Yaowu Xu committed
254
255
      av1_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
256
257
                   y_step_q4, &conv_params);
      conv_params.ref = 0;
Yaowu Xu's avatar
Yaowu Xu committed
258
259
      av1_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
260
                   y_step_q4, &conv_params);
Angie Chiang's avatar
Angie Chiang committed
261

262
      conv_params.ref = 0;
Yaowu Xu's avatar
Yaowu Xu committed
263
264
      av1_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
265
266
                   y_step_q4, &conv_params);
      conv_params.ref = 1;
Yaowu Xu's avatar
Yaowu Xu committed
267
268
      av1_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
                   interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
269
                   y_step_q4, &conv_params);
Angie Chiang's avatar
Angie Chiang committed
270
271
272
273

      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
    }
  }
274
275
}

Yaowu Xu's avatar
Yaowu Xu committed
276
277
#if CONFIG_AOM_HIGHBITDEPTH
TEST(AV1ConvolveTest, av1_highbd_convolve) {
278
  ACMRandom rnd(ACMRandom::DeterministicSeed());
279
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
280
281
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
282
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
283
      av1_get_interp_filter_params(interp_filter[0]);
284
#else
James Zern's avatar
James Zern committed
285
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
286
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
287
      av1_get_interp_filter_params(interp_filter);
288
#endif
289
  int filter_size = filter_params.taps;
290
291
292
  int filter_center = filter_size / 2 - 1;
  uint16_t src[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
293
  uint16_t dst[1] = { 0 };
294
295
296
297
298
299
300
301
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int avg = 0;
  int bd = 10;
  int w = 1;
  int h = 1;

Angie Chiang's avatar
Angie Chiang committed
302
303
304
  int subpel_x_q4;
  int subpel_y_q4;

305
306
307
308
  for (int i = 0; i < filter_size * filter_size; i++) {
    src[i] = rnd.Rand16() % (1 << bd);
  }

309
310
  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Yaowu Xu's avatar
Yaowu Xu committed
311
      av1_highbd_convolve(
Angie Chiang's avatar
Angie Chiang committed
312
          CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
313
          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filter,
Angie Chiang's avatar
Angie Chiang committed
314
315
          subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);

clang-format's avatar
clang-format committed
316
      const int16_t *x_filter =
Yaowu Xu's avatar
Yaowu Xu committed
317
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format's avatar
clang-format committed
318
      const int16_t *y_filter =
Yaowu Xu's avatar
Yaowu Xu committed
319
          av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiang's avatar
Angie Chiang committed
320
321
322
323
324
325
326
327
328
329
330
331
332
333

      int temp[12];
      int dst_ref = 0;
      for (int r = 0; r < filter_size; r++) {
        temp[r] = 0;
        for (int c = 0; c < filter_size; c++) {
          temp[r] += x_filter[c] * src[r * filter_size + c];
        }
        temp[r] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
        dst_ref += temp[r] * y_filter[r];
      }
      dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
      EXPECT_EQ(dst[0], dst_ref);
334
335
336
337
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
338
TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
339
  ACMRandom rnd(ACMRandom::DeterministicSeed());
340
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
341
342
  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
343
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
344
      av1_get_interp_filter_params(interp_filter[0]);
345
#else
James Zern's avatar
James Zern committed
346
  InterpFilter interp_filter = EIGHTTAP_REGULAR;
347
  InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
348
      av1_get_interp_filter_params(interp_filter);
349
#endif
350
  int filter_size = filter_params.taps;
351
352
353
354
  int filter_center = filter_size / 2 - 1;
  uint16_t src0[12 * 12];
  uint16_t src1[12 * 12];
  int src_stride = filter_size;
clang-format's avatar
clang-format committed
355
356
357
  uint16_t dst0[1] = { 0 };
  uint16_t dst1[1] = { 0 };
  uint16_t dst[1] = { 0 };
358
359
360
361
362
363
364
365
366
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
  int avg = 0;
  int bd = 10;

  int w = 1;
  int h = 1;

Angie Chiang's avatar
Angie Chiang committed
367
368
369
  int subpel_x_q4;
  int subpel_y_q4;

370
371
372
373
374
  for (int i = 0; i < filter_size * filter_size; i++) {
    src0[i] = rnd.Rand16() % (1 << bd);
    src1[i] = rnd.Rand16() % (1 << bd);
  }

375
376
  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Angie Chiang's avatar
Angie Chiang committed
377
378
379
      int offset = filter_size * filter_center + filter_center;

      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
380
381
382
383
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
384
      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
385
386
387
388
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
389
390

      avg = 0;
Yaowu Xu's avatar
Yaowu Xu committed
391
392
393
394
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
395
      avg = 1;
Yaowu Xu's avatar
Yaowu Xu committed
396
397
398
399
      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
                          y_step_q4, avg, bd);
Angie Chiang's avatar
Angie Chiang committed
400
401
402
403

      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
    }
  }
404
}
Yaowu Xu's avatar
Yaowu Xu committed
405
#endif  // CONFIG_AOM_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
406
407
408
409

#define CONVOLVE_SPEED_TEST 0
#if CONVOLVE_SPEED_TEST
#define highbd_convolve_speed(func, block_size, frame_size)                  \
Yaowu Xu's avatar
Yaowu Xu committed
410
  TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) {          \
Angie Chiang's avatar
Angie Chiang committed
411
    ACMRandom rnd(ACMRandom::DeterministicSeed());                           \
James Zern's avatar
James Zern committed
412
    InterpFilter interp_filter = EIGHTTAP;                                   \
Angie Chiang's avatar
Angie Chiang committed
413
    InterpFilterParams filter_params =                                       \
Yaowu Xu's avatar
Yaowu Xu committed
414
        av1_get_interp_filter_params(interp_filter);                         \
415
    int filter_size = filter_params.tap;                                     \
Angie Chiang's avatar
Angie Chiang committed
416
417
    int filter_center = filter_size / 2 - 1;                                 \
    DECLARE_ALIGNED(16, uint16_t,                                            \
clang-format's avatar
clang-format committed
418
                    src[(frame_size + 7) * (frame_size + 7)]) = { 0 };       \
Angie Chiang's avatar
Angie Chiang committed
419
    int src_stride = frame_size + 7;                                         \
clang-format's avatar
clang-format committed
420
    DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 };     \
Angie Chiang's avatar
Angie Chiang committed
421
422
423
424
425
426
427
428
429
430
    int dst_stride = frame_size;                                             \
    int x_step_q4 = 16;                                                      \
    int y_step_q4 = 16;                                                      \
    int subpel_x_q4 = 8;                                                     \
    int subpel_y_q4 = 6;                                                     \
    int bd = 10;                                                             \
                                                                             \
    int w = block_size;                                                      \
    int h = block_size;                                                      \
                                                                             \
clang-format's avatar
clang-format committed
431
    const int16_t *filter_x =                                                \
Yaowu Xu's avatar
Yaowu Xu committed
432
        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);            \
clang-format's avatar
clang-format committed
433
    const int16_t *filter_y =                                                \
Yaowu Xu's avatar
Yaowu Xu committed
434
        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);            \
Angie Chiang's avatar
Angie Chiang committed
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
                                                                             \
    for (int i = 0; i < src_stride * src_stride; i++) {                      \
      src[i] = rnd.Rand16() % (1 << bd);                                     \
    }                                                                        \
                                                                             \
    int offset = filter_center * src_stride + filter_center;                 \
    int row_offset = 0;                                                      \
    int col_offset = 0;                                                      \
    for (int i = 0; i < 100000; i++) {                                       \
      int src_total_offset = offset + col_offset * src_stride + row_offset;  \
      int dst_total_offset = col_offset * dst_stride + row_offset;           \
      func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride,           \
           CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
           x_step_q4, filter_y, y_step_q4, w, h, bd);                        \
      if (offset + w + w < frame_size) {                                     \
        row_offset += w;                                                     \
      } else {                                                               \
        row_offset = 0;                                                      \
        col_offset += h;                                                     \
      }                                                                      \
      if (col_offset + h >= frame_size) {                                    \
        col_offset = 0;                                                      \
      }                                                                      \
    }                                                                        \
  }

#define lowbd_convolve_speed(func, block_size, frame_size)                  \
Yaowu Xu's avatar
Yaowu Xu committed
462
  TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) {       \
Angie Chiang's avatar
Angie Chiang committed
463
    ACMRandom rnd(ACMRandom::DeterministicSeed());                          \
James Zern's avatar
James Zern committed
464
    InterpFilter interp_filter = EIGHTTAP;                                  \
Angie Chiang's avatar
Angie Chiang committed
465
    InterpFilterParams filter_params =                                      \
Yaowu Xu's avatar
Yaowu Xu committed
466
        av1_get_interp_filter_params(interp_filter);                        \
467
    int filter_size = filter_params.tap;                                    \
Angie Chiang's avatar
Angie Chiang committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
    int filter_center = filter_size / 2 - 1;                                \
    DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
    int src_stride = frame_size + 7;                                        \
    DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]);             \
    int dst_stride = frame_size;                                            \
    int x_step_q4 = 16;                                                     \
    int y_step_q4 = 16;                                                     \
    int subpel_x_q4 = 8;                                                    \
    int subpel_y_q4 = 6;                                                    \
    int bd = 8;                                                             \
                                                                            \
    int w = block_size;                                                     \
    int h = block_size;                                                     \
                                                                            \
clang-format's avatar
clang-format committed
482
    const int16_t *filter_x =                                               \
Yaowu Xu's avatar
Yaowu Xu committed
483
        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);           \
clang-format's avatar
clang-format committed
484
    const int16_t *filter_y =                                               \
Yaowu Xu's avatar
Yaowu Xu committed
485
        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);           \
Angie Chiang's avatar
Angie Chiang committed
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
                                                                            \
    for (int i = 0; i < src_stride * src_stride; i++) {                     \
      src[i] = rnd.Rand16() % (1 << bd);                                    \
    }                                                                       \
                                                                            \
    int offset = filter_center * src_stride + filter_center;                \
    int row_offset = 0;                                                     \
    int col_offset = 0;                                                     \
    for (int i = 0; i < 100000; i++) {                                      \
      func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4,  \
           filter_y, y_step_q4, w, h);                                      \
      if (offset + w + w < frame_size) {                                    \
        row_offset += w;                                                    \
      } else {                                                              \
        row_offset = 0;                                                     \
        col_offset += h;                                                    \
      }                                                                     \
      if (col_offset + h >= frame_size) {                                   \
        col_offset = 0;                                                     \
      }                                                                     \
    }                                                                       \
  }

// This experiment shows that when frame size is 64x64
Yaowu Xu's avatar
Yaowu Xu committed
510
// aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar.
Angie Chiang's avatar
Angie Chiang committed
511
// However when frame size becomes 1024x1024
Yaowu Xu's avatar
Yaowu Xu committed
512
// aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2
Angie Chiang's avatar
Angie Chiang committed
513
// we think the bottleneck is from memory IO
Yaowu Xu's avatar
Yaowu Xu committed
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64);

lowbd_convolve_speed(aom_convolve8_sse2, 8, 64);
lowbd_convolve_speed(aom_convolve8_sse2, 16, 64);
lowbd_convolve_speed(aom_convolve8_sse2, 32, 64);
lowbd_convolve_speed(aom_convolve8_sse2, 64, 64);

highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024);
highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024);

lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024);
lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024);
lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024);
lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024);
Angie Chiang's avatar
Angie Chiang committed
533
#endif  // CONVOLVE_SPEED_TEST
534
}  // namespace