dering_test.cc 15.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/

#include <cstdlib>
#include <string>

#include "third_party/googletest/src/googletest/include/gtest/gtest.h"

#include "./aom_config.h"
#include "./av1_rtcd.h"
#include "aom_ports/aom_timer.h"
#include "av1/common/od_dering.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"

using libaom_test::ACMRandom;

namespace {

30 31 32
typedef std::tr1::tuple<od_filter_dering_direction_func,
                        od_filter_dering_direction_func, int>
    dering_dir_param_t;
33

34
class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
35
 public:
36
  virtual ~CDEFDeringDirTest() {}
37 38 39 40 41 42 43 44 45 46
  virtual void SetUp() {
    dering = GET_PARAM(0);
    ref_dering = GET_PARAM(1);
    bsize = GET_PARAM(2);
  }

  virtual void TearDown() { libaom_test::ClearSystemState(); }

 protected:
  int bsize;
47 48
  od_filter_dering_direction_func dering;
  od_filter_dering_direction_func ref_dering;
49 50
};

51
typedef CDEFDeringDirTest CDEFDeringSpeedTest;
52 53

void test_dering(int bsize, int iterations,
54 55
                 od_filter_dering_direction_func dering,
                 od_filter_dering_direction_func ref_dering) {
56 57 58 59 60 61 62 63 64 65
  const int size = 8;
  const int ysize = size + 2 * OD_FILT_VBORDER;
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  DECLARE_ALIGNED(16, uint16_t, s[ysize * OD_FILT_BSTRIDE]);
  DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
  DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
  memset(ref_d, 0, sizeof(ref_d));
  memset(d, 0, sizeof(d));

  int error = 0, threshold = 0, dir;
66 67
  int boundary, damping, depth, bits, level, count,
      errdepth = 0, errthreshold = 0, errboundary = 0, errdamping = 0;
68 69
  unsigned int pos = 0;

70 71
  for (boundary = 0; boundary < 16; boundary++) {
    for (depth = 8; depth <= 12; depth += 2) {
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
      for (damping = 5 + depth - 8; damping < 7 + depth - 8; damping++) {
        for (count = 0; count < iterations; count++) {
          for (level = 0; level < (1 << depth) && !error;
               level += (1 + 4 * !!boundary) << (depth - 8)) {
            for (bits = 1; bits <= depth && !error; bits++) {
              for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
                s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
                             (1 << depth) - 1);
              if (boundary) {
                if (boundary & 1) {  // Left
                  for (int i = 0; i < ysize; i++)
                    for (int j = 0; j < OD_FILT_HBORDER; j++)
                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
                }
                if (boundary & 2) {  // Right
                  for (int i = 0; i < ysize; i++)
                    for (int j = OD_FILT_HBORDER + size; j < OD_FILT_BSTRIDE;
                         j++)
                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
                }
                if (boundary & 4) {  // Above
                  for (int i = 0; i < OD_FILT_VBORDER; i++)
                    for (int j = 0; j < OD_FILT_BSTRIDE; j++)
                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
                }
                if (boundary & 8) {  // Below
                  for (int i = OD_FILT_VBORDER + size; i < ysize; i++)
                    for (int j = 0; j < OD_FILT_BSTRIDE; j++)
                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
                }
102
              }
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
              for (dir = 0; dir < 8; dir++) {
                for (threshold = 0; threshold < 64 << (depth - 8) && !error;
                     threshold += (1 + 4 * !!boundary) << (depth - 8)) {
                  ref_dering(ref_d, size, s + OD_FILT_HBORDER +
                                              OD_FILT_VBORDER * OD_FILT_BSTRIDE,
                             threshold, dir, damping);
                  // If dering and ref_dering are the same, we're just testing
                  // speed
                  if (dering != ref_dering)
                    ASM_REGISTER_STATE_CHECK(dering(
                        d, size,
                        s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
                        threshold, dir, damping));
                  if (ref_dering != dering) {
                    for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error;
                         pos++) {
                      error = ref_d[pos] != d[pos];
                      errdepth = depth;
                      errthreshold = threshold;
                      errboundary = boundary;
                      errdamping = damping;
                    }
125
                  }
126 127 128 129 130 131 132 133 134 135
                }
              }
            }
          }
        }
      }
    }
  }

  pos--;
136
  EXPECT_EQ(0, error) << "Error: CDEFDeringDirTest, SIMD and C mismatch."
137 138 139 140 141
                      << std::endl
                      << "First error at " << pos % size << "," << pos / size
                      << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
                      << ") " << std::endl
                      << "threshold: " << errthreshold << std::endl
142
                      << "damping: " << errdamping << std::endl
143 144
                      << "depth: " << errdepth << std::endl
                      << "size: " << bsize << std::endl
145
                      << "boundary: " << errboundary << std::endl
146 147 148 149
                      << std::endl;
}

void test_dering_speed(int bsize, int iterations,
150 151
                       od_filter_dering_direction_func dering,
                       od_filter_dering_direction_func ref_dering) {
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
  aom_usec_timer ref_timer;
  aom_usec_timer timer;

  aom_usec_timer_start(&ref_timer);
  test_dering(bsize, iterations, ref_dering, ref_dering);
  aom_usec_timer_mark(&ref_timer);
  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);

  aom_usec_timer_start(&timer);
  test_dering(bsize, iterations, dering, dering);
  aom_usec_timer_mark(&timer);
  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);

#if 0
  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
#endif

  EXPECT_GT(ref_elapsed_time, elapsed_time)
171
      << "Error: CDEFDeringSpeedTest, SIMD slower than C." << std::endl
172 173 174 175 176 177 178 179 180
      << "C time: " << ref_elapsed_time << " us" << std::endl
      << "SIMD time: " << elapsed_time << " us" << std::endl;
}

typedef int (*find_dir_t)(const od_dering_in *img, int stride, int32_t *var,
                          int coeff_shift);

typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;

181 182
class CDEFDeringFindDirTest
    : public ::testing::TestWithParam<find_dir_param_t> {
183
 public:
184
  virtual ~CDEFDeringFindDirTest() {}
185 186 187 188 189 190 191 192 193 194 195 196
  virtual void SetUp() {
    finddir = GET_PARAM(0);
    ref_finddir = GET_PARAM(1);
  }

  virtual void TearDown() { libaom_test::ClearSystemState(); }

 protected:
  find_dir_t finddir;
  find_dir_t ref_finddir;
};

197
typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest;
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232

void test_finddir(int (*finddir)(const od_dering_in *img, int stride,
                                 int32_t *var, int coeff_shift),
                  int (*ref_finddir)(const od_dering_in *img, int stride,
                                     int32_t *var, int coeff_shift)) {
  const int size = 8;
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  DECLARE_ALIGNED(16, uint16_t, s[size * size]);

  int error = 0;
  int depth, bits, level, count, errdepth = 0;
  int ref_res = 0, res = 0;
  int32_t ref_var = 0, var = 0;

  for (depth = 8; depth <= 12 && !error; depth += 2) {
    for (count = 0; count < 512 && !error; count++) {
      for (level = 0; level < (1 << depth) && !error;
           level += 1 << (depth - 8)) {
        for (bits = 1; bits <= depth && !error; bits++) {
          for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
            s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
                         (1 << depth) - 1);
          for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
            ref_res = ref_finddir(s, size, &ref_var, depth - 8);
          if (finddir != ref_finddir)
            ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
          if (ref_finddir != finddir) {
            if (res != ref_res || var != ref_var) error = 1;
            errdepth = depth;
          }
        }
      }
    }
  }

233
  EXPECT_EQ(0, error) << "Error: CDEFDeringFindDirTest, SIMD and C mismatch."
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
                      << std::endl
                      << "return: " << res << " : " << ref_res << std::endl
                      << "var: " << var << " : " << ref_var << std::endl
                      << "depth: " << errdepth << std::endl
                      << std::endl;
}

void test_finddir_speed(int (*finddir)(const od_dering_in *img, int stride,
                                       int32_t *var, int coeff_shift),
                        int (*ref_finddir)(const od_dering_in *img, int stride,
                                           int32_t *var, int coeff_shift)) {
  aom_usec_timer ref_timer;
  aom_usec_timer timer;

  aom_usec_timer_start(&ref_timer);
  test_finddir(ref_finddir, ref_finddir);
  aom_usec_timer_mark(&ref_timer);
  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);

  aom_usec_timer_start(&timer);
  test_finddir(finddir, finddir);
  aom_usec_timer_mark(&timer);
  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);

#if 0
  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
#endif

  EXPECT_GT(ref_elapsed_time, elapsed_time)
264
      << "Error: CDEFDeringFindDirSpeedTest, SIMD slower than C." << std::endl
265 266 267 268
      << "C time: " << ref_elapsed_time << " us" << std::endl
      << "SIMD time: " << elapsed_time << " us" << std::endl;
}

269
TEST_P(CDEFDeringDirTest, TestSIMDNoMismatch) {
270 271 272
  test_dering(bsize, 1, dering, ref_dering);
}

273
TEST_P(CDEFDeringSpeedTest, DISABLED_TestSpeed) {
274 275 276
  test_dering_speed(bsize, 4, dering, ref_dering);
}

277
TEST_P(CDEFDeringFindDirTest, TestSIMDNoMismatch) {
278 279 280
  test_finddir(finddir, ref_finddir);
}

281
TEST_P(CDEFDeringFindDirSpeedTest, DISABLED_TestSpeed) {
282 283 284 285 286 287 288 289 290 291 292
  test_finddir_speed(finddir, ref_finddir);
}

using std::tr1::make_tuple;

// VS compiling for 32 bit targets does not support vector types in
// structs as arguments, which makes the v256 type of the intrinsics
// hard to support, so optimizations for this target are disabled.
#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
293
    SSE2, CDEFDeringDirTest,
294 295 296 297
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_sse2,
                                 &od_filter_dering_direction_8x8_c, 8)));
298
INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest,
299 300 301 302 303
                        ::testing::Values(make_tuple(&od_dir_find8_sse2,
                                                     &od_dir_find8_c)));
#endif
#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P(
304
    SSSE3, CDEFDeringDirTest,
305 306 307 308
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_ssse3,
                                 &od_filter_dering_direction_8x8_c, 8)));
309
INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest,
310 311 312 313 314 315
                        ::testing::Values(make_tuple(&od_dir_find8_ssse3,
                                                     &od_dir_find8_c)));
#endif

#if HAVE_SSE4_1
INSTANTIATE_TEST_CASE_P(
316
    SSE4_1, CDEFDeringDirTest,
317 318 319 320
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_sse4_1,
                                 &od_filter_dering_direction_8x8_c, 8)));
321
INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest,
322 323 324 325 326 327
                        ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
                                                     &od_dir_find8_c)));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(
328
    NEON, CDEFDeringDirTest,
329 330 331 332
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_neon,
                                 &od_filter_dering_direction_8x8_c, 8)));
333
INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest,
334 335 336 337 338 339 340
                        ::testing::Values(make_tuple(&od_dir_find8_neon,
                                                     &od_dir_find8_c)));
#endif

// Test speed for all supported architectures
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
341
    SSE2, CDEFDeringSpeedTest,
342 343 344 345
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_sse2,
                                 &od_filter_dering_direction_8x8_c, 8)));
346
INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest,
347 348 349 350 351 352
                        ::testing::Values(make_tuple(&od_dir_find8_sse2,
                                                     &od_dir_find8_c)));
#endif

#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P(
353
    SSSE3, CDEFDeringSpeedTest,
354 355 356 357
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_ssse3,
                                 &od_filter_dering_direction_8x8_c, 8)));
358
INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest,
359 360 361 362 363 364
                        ::testing::Values(make_tuple(&od_dir_find8_ssse3,
                                                     &od_dir_find8_c)));
#endif

#if HAVE_SSE4_1
INSTANTIATE_TEST_CASE_P(
365
    SSE4_1, CDEFDeringSpeedTest,
366 367 368 369
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_sse4_1,
                                 &od_filter_dering_direction_8x8_c, 8)));
370
INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest,
371 372 373 374 375 376
                        ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
                                                     &od_dir_find8_c)));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(
377
    NEON, CDEFDeringSpeedTest,
378 379 380 381
    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
                                 &od_filter_dering_direction_4x4_c, 4),
                      make_tuple(&od_filter_dering_direction_8x8_neon,
                                 &od_filter_dering_direction_8x8_c, 8)));
382
INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest,
383 384 385 386 387 388
                        ::testing::Values(make_tuple(&od_dir_find8_neon,
                                                     &od_dir_find8_c)));
#endif

#endif  // defined(_WIN64) || !defined(_MSC_VER)
}  // namespace