dct16x16_test.cc 29.4 KB
Newer Older
Daniel Kang's avatar
Daniel Kang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <math.h>
#include <stdlib.h>
#include <string.h>

#include "third_party/googletest/src/include/gtest/gtest.h"
16 17 18 19
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
Daniel Kang's avatar
Daniel Kang committed
20

Yaowu Xu's avatar
Yaowu Xu committed
21
#include "./vp9_rtcd.h"
22
#include "./vpx_dsp_rtcd.h"
23
#include "vp9/common/vp9_entropy.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
24
#include "vp9/common/vp9_scan.h"
25
#include "vpx/vpx_codec.h"
26
#include "vpx/vpx_integer.h"
27
#include "vpx_ports/mem.h"
28

Daniel Kang's avatar
Daniel Kang committed
29 30 31
using libvpx_test::ACMRandom;

namespace {
32 33 34 35

#ifdef _MSC_VER
static int round(double x) {
  if (x < 0)
Yaowu Xu's avatar
Yaowu Xu committed
36
    return static_cast<int>(ceil(x - 0.5));
37
  else
Yaowu Xu's avatar
Yaowu Xu committed
38
    return static_cast<int>(floor(x + 0.5));
39 40
}
#endif
Daniel Kang's avatar
Daniel Kang committed
41

42
const int kNumCoeffs = 256;
Daniel Kang's avatar
Daniel Kang committed
43 44 45 46 47 48 49 50
const double PI = 3.1415926535898;
void reference2_16x16_idct_2d(double *input, double *output) {
  double x;
  for (int l = 0; l < 16; ++l) {
    for (int k = 0; k < 16; ++k) {
      double s = 0;
      for (int i = 0; i < 16; ++i) {
        for (int j = 0; j < 16; ++j) {
Yaowu Xu's avatar
Yaowu Xu committed
51 52 53
          x = cos(PI * j * (l + 0.5) / 16.0) *
              cos(PI * i * (k + 0.5) / 16.0) *
              input[i * 16 + j] / 256;
Daniel Kang's avatar
Daniel Kang committed
54 55 56 57 58 59 60 61 62 63 64 65
          if (i != 0)
            x *= sqrt(2.0);
          if (j != 0)
            x *= sqrt(2.0);
          s += x;
        }
      }
      output[k*16+l] = s;
    }
  }
}

66

67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
const double C3 = 0.956940335732209;
const double C4 = 0.923879532511287;
const double C5 = 0.881921264348355;
const double C6 = 0.831469612302545;
const double C7 = 0.773010453362737;
const double C8 = 0.707106781186548;
const double C9 = 0.634393284163646;
const double C10 = 0.555570233019602;
const double C11 = 0.471396736825998;
const double C12 = 0.38268343236509;
const double C13 = 0.290284677254462;
const double C14 = 0.195090322016128;
const double C15 = 0.098017140329561;
82

Jingning Han's avatar
Jingning Han committed
83
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
Daniel Kang's avatar
Daniel Kang committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
  double step[16];
  double intermediate[16];
  double temp1, temp2;

  // step 1
  step[ 0] = input[0] + input[15];
  step[ 1] = input[1] + input[14];
  step[ 2] = input[2] + input[13];
  step[ 3] = input[3] + input[12];
  step[ 4] = input[4] + input[11];
  step[ 5] = input[5] + input[10];
  step[ 6] = input[6] + input[ 9];
  step[ 7] = input[7] + input[ 8];
  step[ 8] = input[7] - input[ 8];
  step[ 9] = input[6] - input[ 9];
  step[10] = input[5] - input[10];
  step[11] = input[4] - input[11];
  step[12] = input[3] - input[12];
  step[13] = input[2] - input[13];
  step[14] = input[1] - input[14];
  step[15] = input[0] - input[15];

  // step 2
  output[0] = step[0] + step[7];
  output[1] = step[1] + step[6];
  output[2] = step[2] + step[5];
  output[3] = step[3] + step[4];
  output[4] = step[3] - step[4];
  output[5] = step[2] - step[5];
  output[6] = step[1] - step[6];
  output[7] = step[0] - step[7];

116 117
  temp1 = step[ 8] * C7;
  temp2 = step[15] * C9;
Daniel Kang's avatar
Daniel Kang committed
118 119
  output[ 8] = temp1 + temp2;

120 121
  temp1 = step[ 9] * C11;
  temp2 = step[14] * C5;
Daniel Kang's avatar
Daniel Kang committed
122 123
  output[ 9] = temp1 - temp2;

124 125
  temp1 = step[10] * C3;
  temp2 = step[13] * C13;
Daniel Kang's avatar
Daniel Kang committed
126 127
  output[10] = temp1 + temp2;

128 129
  temp1 = step[11] * C15;
  temp2 = step[12] * C1;
Daniel Kang's avatar
Daniel Kang committed
130 131
  output[11] = temp1 - temp2;

132 133
  temp1 = step[11] * C1;
  temp2 = step[12] * C15;
Daniel Kang's avatar
Daniel Kang committed
134 135
  output[12] = temp2 + temp1;

136 137
  temp1 = step[10] * C13;
  temp2 = step[13] * C3;
Daniel Kang's avatar
Daniel Kang committed
138 139
  output[13] = temp2 - temp1;

140 141
  temp1 = step[ 9] * C5;
  temp2 = step[14] * C11;
Daniel Kang's avatar
Daniel Kang committed
142 143
  output[14] = temp2 + temp1;

144 145
  temp1 = step[ 8] * C9;
  temp2 = step[15] * C7;
Daniel Kang's avatar
Daniel Kang committed
146 147 148 149 150 151 152 153
  output[15] = temp2 - temp1;

  // step 3
  step[ 0] = output[0] + output[3];
  step[ 1] = output[1] + output[2];
  step[ 2] = output[1] - output[2];
  step[ 3] = output[0] - output[3];

154 155
  temp1 = output[4] * C14;
  temp2 = output[7] * C2;
Daniel Kang's avatar
Daniel Kang committed
156 157
  step[ 4] = temp1 + temp2;

158 159
  temp1 = output[5] * C10;
  temp2 = output[6] * C6;
Daniel Kang's avatar
Daniel Kang committed
160 161
  step[ 5] = temp1 + temp2;

162 163
  temp1 = output[5] * C6;
  temp2 = output[6] * C10;
Daniel Kang's avatar
Daniel Kang committed
164 165
  step[ 6] = temp2 - temp1;

166 167
  temp1 = output[4] * C2;
  temp2 = output[7] * C14;
Daniel Kang's avatar
Daniel Kang committed
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
  step[ 7] = temp2 - temp1;

  step[ 8] = output[ 8] + output[11];
  step[ 9] = output[ 9] + output[10];
  step[10] = output[ 9] - output[10];
  step[11] = output[ 8] - output[11];

  step[12] = output[12] + output[15];
  step[13] = output[13] + output[14];
  step[14] = output[13] - output[14];
  step[15] = output[12] - output[15];

  // step 4
  output[ 0] = (step[ 0] + step[ 1]);
  output[ 8] = (step[ 0] - step[ 1]);

184 185
  temp1 = step[2] * C12;
  temp2 = step[3] * C4;
Daniel Kang's avatar
Daniel Kang committed
186
  temp1 = temp1 + temp2;
187
  output[ 4] = 2*(temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
188

189 190
  temp1 = step[2] * C4;
  temp2 = step[3] * C12;
Daniel Kang's avatar
Daniel Kang committed
191
  temp1 = temp2 - temp1;
192
  output[12] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
193

194 195
  output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
  output[14] = 2 * ((step[7] - step[ 6]) * C8);
Daniel Kang's avatar
Daniel Kang committed
196 197 198 199 200 201 202 203 204

  temp1 = step[4] - step[5];
  temp2 = step[6] + step[7];
  output[ 6] = (temp1 + temp2);
  output[10] = (temp1 - temp2);

  intermediate[8] = step[8] + step[14];
  intermediate[9] = step[9] + step[15];

205 206
  temp1 = intermediate[8] * C12;
  temp2 = intermediate[9] * C4;
Daniel Kang's avatar
Daniel Kang committed
207
  temp1 = temp1 - temp2;
208
  output[3] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
209

210 211
  temp1 = intermediate[8] * C4;
  temp2 = intermediate[9] * C12;
Daniel Kang's avatar
Daniel Kang committed
212
  temp1 = temp2 + temp1;
213
  output[13] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
214

215
  output[ 9] = 2 * ((step[10] + step[11]) * C8);
Daniel Kang's avatar
Daniel Kang committed
216 217 218 219 220 221 222 223 224 225

  intermediate[11] = step[10] - step[11];
  intermediate[12] = step[12] + step[13];
  intermediate[13] = step[12] - step[13];
  intermediate[14] = step[ 8] - step[14];
  intermediate[15] = step[ 9] - step[15];

  output[15] = (intermediate[11] + intermediate[12]);
  output[ 1] = -(intermediate[11] - intermediate[12]);

226
  output[ 7] = 2 * (intermediate[13] * C8);
Daniel Kang's avatar
Daniel Kang committed
227

228 229
  temp1 = intermediate[14] * C12;
  temp2 = intermediate[15] * C4;
Daniel Kang's avatar
Daniel Kang committed
230
  temp1 = temp1 - temp2;
231
  output[11] = -2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
232

233 234
  temp1 = intermediate[14] * C4;
  temp2 = intermediate[15] * C12;
Daniel Kang's avatar
Daniel Kang committed
235
  temp1 = temp2 + temp1;
236
  output[ 5] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
237 238
}

239
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
Daniel Kang's avatar
Daniel Kang committed
240 241 242 243
  // First transform columns
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
244
      temp_in[j] = input[j * 16 + i];
Daniel Kang's avatar
Daniel Kang committed
245 246
    butterfly_16x16_dct_1d(temp_in, temp_out);
    for (int j = 0; j < 16; ++j)
247
      output[j * 16 + i] = temp_out[j];
Daniel Kang's avatar
Daniel Kang committed
248 249 250 251 252
  }
  // Then transform rows
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
253
      temp_in[j] = output[j + i * 16];
Daniel Kang's avatar
Daniel Kang committed
254 255 256
    butterfly_16x16_dct_1d(temp_in, temp_out);
    // Scale by some magic number
    for (int j = 0; j < 16; ++j)
257
      output[j + i * 16] = temp_out[j]/2;
Daniel Kang's avatar
Daniel Kang committed
258 259 260
  }
}

261 262 263
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
264
                        int tx_type);
265
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
266
                        int tx_type);
Daniel Kang's avatar
Daniel Kang committed
267

268 269
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
270 271
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
    Idct16x16Param;
272

273
void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
274
                   int /*tx_type*/) {
275
  vp9_fdct16x16_c(in, out, stride);
276 277
}

278
void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
279
                   int /*tx_type*/) {
280 281 282
  vp9_idct16x16_256_add_c(in, dest, stride);
}

283 284
void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                  int tx_type) {
285
  vp9_fht16x16_c(in, out, stride, tx_type);
286 287
}

288 289
void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                  int tx_type) {
290 291 292
  vp9_iht16x16_256_add_c(in, dest, stride, tx_type);
}

293 294
#if CONFIG_VP9_HIGHBITDEPTH
void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
295
  vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
296 297 298
}

void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
299
  vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
300 301 302 303 304 305 306 307 308 309 310 311 312
}

void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_10(in, out, stride);
}

void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_12(in, out, stride);
}

void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
313
  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
314 315 316
}

void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
317
  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
318
}
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345

void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
}

void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
}

#if HAVE_SSE2
void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
}

void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
}

void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
}

void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
}
#endif  // HAVE_SSE2
#endif  // CONFIG_VP9_HIGHBITDEPTH
346

347
class Trans16x16TestBase {
348
 public:
349
  virtual ~Trans16x16TestBase() {}
350

351
 protected:
352
  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
353

354
  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
355 356 357

  void RunAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
358 359
    uint32_t max_error = 0;
    int64_t total_error = 0;
360 361
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
362 363 364 365
      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
366
#if CONFIG_VP9_HIGHBITDEPTH
367 368
      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
369
#endif
370

371
      // Initialize a test block with input range [-mask_, mask_].
372
      for (int j = 0; j < kNumCoeffs; ++j) {
373 374 375 376 377 378 379 380 381 382 383
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          test_input_block[j] = src[j] - dst[j];
#if CONFIG_VP9_HIGHBITDEPTH
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          test_input_block[j] = src16[j] - dst16[j];
#endif
        }
384 385
      }

386 387
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
                                          test_temp_block, pitch_));
388 389 390 391 392 393 394 395 396
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, dst, pitch_));
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
397 398

      for (int j = 0; j < kNumCoeffs; ++j) {
399 400 401 402
#if CONFIG_VP9_HIGHBITDEPTH
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ?  dst[j] - src[j] : dst16[j] - src16[j];
#else
403
        const uint32_t diff = dst[j] - src[j];
404
#endif
405
        const uint32_t error = diff * diff;
406 407 408 409
        if (max_error < error)
          max_error = error;
        total_error += error;
      }
Scott LaVarnway's avatar
Scott LaVarnway committed
410
    }
Daniel Kang's avatar
Daniel Kang committed
411

412
    EXPECT_GE(1u  << 2 * (bit_depth_ - 8), max_error)
413 414
        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";

415
    EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
416
        << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
417 418
  }

419
  void RunCoeffCheck() {
420 421
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
422 423 424
    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
425

426
    for (int i = 0; i < count_test_block; ++i) {
427
      // Initialize a test block with input range [-mask_, mask_].
428
      for (int j = 0; j < kNumCoeffs; ++j)
429
        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
430 431

      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
432
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
433 434 435 436 437 438 439 440 441 442

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j)
        EXPECT_EQ(output_block[j], output_ref_block[j]);
    }
  }

  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
443 444 445
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
Scott LaVarnway's avatar
Scott LaVarnway committed
446

447
    for (int i = 0; i < count_test_block; ++i) {
448
      // Initialize a test block with input range [-mask_, mask_].
449
      for (int j = 0; j < kNumCoeffs; ++j) {
450
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
451
      }
452
      if (i == 0) {
453
        for (int j = 0; j < kNumCoeffs; ++j)
454
          input_extreme_block[j] = mask_;
455
      } else if (i == 1) {
456
        for (int j = 0; j < kNumCoeffs; ++j)
457
          input_extreme_block[j] = -mask_;
458
      }
459

460
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
461 462
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
                                          output_block, pitch_));
463 464 465

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j) {
466
        EXPECT_EQ(output_block[j], output_ref_block[j]);
467
        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
468 469
            << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
      }
470
    }
471 472
  }

473 474
  void RunQuantCheck(int dc_thred, int ac_thred) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
475
    const int count_test_block = 100000;
476 477
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
478

479 480
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
481
#if CONFIG_VP9_HIGHBITDEPTH
482 483
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
484
#endif
485 486

    for (int i = 0; i < count_test_block; ++i) {
487
      // Initialize a test block with input range [-mask_, mask_].
488
      for (int j = 0; j < kNumCoeffs; ++j) {
489
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
490 491 492
      }
      if (i == 0)
        for (int j = 0; j < kNumCoeffs; ++j)
493
          input_extreme_block[j] = mask_;
494 495
      if (i == 1)
        for (int j = 0; j < kNumCoeffs; ++j)
496
          input_extreme_block[j] = -mask_;
497 498 499 500

      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

      // clear reconstructed pixel buffers
James Zern's avatar
James Zern committed
501 502
      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
503
#if CONFIG_VP9_HIGHBITDEPTH
James Zern's avatar
James Zern committed
504 505
      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
506
#endif
507 508 509 510 511

      // quantization with maximum allowed step sizes
      output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
      for (int j = 1; j < kNumCoeffs; ++j)
        output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
      if (bit_depth_ == VPX_BITS_8) {
        inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
                     tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block,
                                            CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
      if (bit_depth_ == VPX_BITS_8) {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref[j], dst[j]);
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref16[j], dst16[j]);
#endif
      }
532 533 534
    }
  }

535 536 537
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
538 539 540 541
    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
542
#if CONFIG_VP9_HIGHBITDEPTH
543 544
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
545
#endif  // CONFIG_VP9_HIGHBITDEPTH
Daniel Kang's avatar
Daniel Kang committed
546

547 548
    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
Daniel Kang's avatar
Daniel Kang committed
549

550 551
      // Initialize a test block with input range [-255, 255].
      for (int j = 0; j < kNumCoeffs; ++j) {
552 553 554 555 556 557 558 559 560
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          in[j] = src[j] - dst[j];
#if CONFIG_VP9_HIGHBITDEPTH
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
561
#endif  // CONFIG_VP9_HIGHBITDEPTH
562
        }
563 564 565 566
      }

      reference_16x16_dct_2d(in, out_r);
      for (int j = 0; j < kNumCoeffs; ++j)
567
        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
568

569 570 571 572 573 574
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
#if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
575
#endif  // CONFIG_VP9_HIGHBITDEPTH
576
      }
577 578

      for (int j = 0; j < kNumCoeffs; ++j) {
579 580 581 582
#if CONFIG_VP9_HIGHBITDEPTH
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
583
        const uint32_t diff = dst[j] - src[j];
584
#endif  // CONFIG_VP9_HIGHBITDEPTH
585 586
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
587 588 589
            << "Error: 16x16 IDCT has error " << error
            << " at index " << j;
      }
Daniel Kang's avatar
Daniel Kang committed
590 591
    }
  }
592 593 594 595 596 597

  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 10;
    const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
598 599 600
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
601
#if CONFIG_VP9_HIGHBITDEPTH
602 603
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
#endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int i = 0; i < count_test_block; ++i) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
        } else {
          coeff[scan[j]] = 0;
        }
        if (bit_depth_ == VPX_BITS_8) {
          dst[j] = 0;
          ref[j] = 0;
#if CONFIG_VP9_HIGHBITDEPTH
        } else {
          dst16[j] = 0;
          ref16[j] = 0;
#endif  // CONFIG_VP9_HIGHBITDEPTH
        }
      }
      if (bit_depth_ == VPX_BITS_8) {
        ref_txfm(coeff, ref, pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
      } else {
#if CONFIG_VP9_HIGHBITDEPTH
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                 pitch_));
#endif  // CONFIG_VP9_HIGHBITDEPTH
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
        const uint32_t diff = dst[j] - ref[j];
#endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
            << "Error: 16x16 IDCT Comparison has error " << error
            << " at index " << j;
      }
    }
  }

650 651
  int pitch_;
  int tx_type_;
652 653
  vpx_bit_depth_t bit_depth_;
  int mask_;
654 655
  FhtFunc fwd_txfm_ref;
  IhtFunc inv_txfm_ref;
656
};
Daniel Kang's avatar
Daniel Kang committed
657

658 659
class Trans16x16DCT
    : public Trans16x16TestBase,
660
      public ::testing::TestWithParam<Dct16x16Param> {
661 662
 public:
  virtual ~Trans16x16DCT() {}
Daniel Kang's avatar
Daniel Kang committed
663

664 665 666 667
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
668
    bit_depth_ = GET_PARAM(3);
669
    pitch_    = 16;
670
    fwd_txfm_ref = fdct16x16_ref;
671
    inv_txfm_ref = idct16x16_ref;
672 673 674
    mask_ = (1 << bit_depth_) - 1;
#if CONFIG_VP9_HIGHBITDEPTH
    switch (bit_depth_) {
675
      case VPX_BITS_10:
676 677
        inv_txfm_ref = idct16x16_10_ref;
        break;
678
      case VPX_BITS_12:
679 680 681 682 683 684 685 686 687
        inv_txfm_ref = idct16x16_12_ref;
        break;
      default:
        inv_txfm_ref = idct16x16_ref;
        break;
    }
#else
    inv_txfm_ref = idct16x16_ref;
#endif
688 689
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
Daniel Kang's avatar
Daniel Kang committed
690

691
 protected:
692
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
693 694
    fwd_txfm_(in, out, stride);
  }
695
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
696
    inv_txfm_(out, dst, stride);
Daniel Kang's avatar
Daniel Kang committed
697
  }
698

699 700
  FdctFunc fwd_txfm_;
  IdctFunc inv_txfm_;
701 702 703 704
};

TEST_P(Trans16x16DCT, AccuracyCheck) {
  RunAccuracyCheck();
Daniel Kang's avatar
Daniel Kang committed
705
}
706

707 708 709 710 711 712
TEST_P(Trans16x16DCT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16DCT, MemCheck) {
  RunMemCheck();
713 714
}

715 716 717 718 719 720
TEST_P(Trans16x16DCT, QuantCheck) {
  // Use maximally allowed quantization step sizes for DC and AC
  // coefficients respectively.
  RunQuantCheck(1336, 1828);
}

721 722 723 724
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
  RunInvAccuracyCheck();
}

725 726
class Trans16x16HT
    : public Trans16x16TestBase,
727
      public ::testing::TestWithParam<Ht16x16Param> {
728 729 730 731 732 733 734
 public:
  virtual ~Trans16x16HT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
735
    bit_depth_ = GET_PARAM(3);
736 737
    pitch_    = 16;
    fwd_txfm_ref = fht16x16_ref;
738
    inv_txfm_ref = iht16x16_ref;
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
    mask_ = (1 << bit_depth_) - 1;
#if CONFIG_VP9_HIGHBITDEPTH
    switch (bit_depth_) {
      case VPX_BITS_10:
        inv_txfm_ref = iht16x16_10;
        break;
      case VPX_BITS_12:
        inv_txfm_ref = iht16x16_12;
        break;
      default:
        inv_txfm_ref = iht16x16_ref;
        break;
    }
#else
    inv_txfm_ref = iht16x16_ref;
#endif
755
  }
756 757 758
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
759
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
760
    fwd_txfm_(in, out, stride, tx_type_);
761
  }
762
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
763
    inv_txfm_(out, dst, stride, tx_type_);
764 765
  }

766 767
  FhtFunc fwd_txfm_;
  IhtFunc inv_txfm_;
768 769 770 771 772 773
};

TEST_P(Trans16x16HT, AccuracyCheck) {
  RunAccuracyCheck();
}

774 775 776 777 778 779
TEST_P(Trans16x16HT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16HT, MemCheck) {
  RunMemCheck();
780 781
}

782 783 784
TEST_P(Trans16x16HT, QuantCheck) {
  // The encoder skips any non-DC intra prediction modes,
  // when the quantization step size goes beyond 988.
785
  RunQuantCheck(429, 729);
786 787
}

788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
class InvTrans16x16DCT
    : public Trans16x16TestBase,
      public ::testing::TestWithParam<Idct16x16Param> {
 public:
  virtual ~InvTrans16x16DCT() {}

  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    thresh_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
    pitch_ = 16;
    mask_ = (1 << bit_depth_) - 1;
}
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }

  IdctFunc ref_txfm_;
  IdctFunc inv_txfm_;
  int thresh_;
};

TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
}

819 820
using std::tr1::make_tuple;

821 822 823 824
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
825 826
        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
827 828
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
#else
829 830 831
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
832
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
833
#endif  // CONFIG_VP9_HIGHBITDEPTH
834 835

#if CONFIG_VP9_HIGHBITDEPTH
836 837 838
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
839 840 841 842 843 844 845 846
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
847 848 849 850 851 852 853 854 855 856 857 858
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
#else
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
859
#endif  // CONFIG_VP9_HIGHBITDEPTH
860

861
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
James Zern's avatar
James Zern committed
862 863 864 865
INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c,
866
                   &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
James Zern's avatar
James Zern committed
867 868
#endif

869
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
870 871 872
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
873
        make_tuple(&vp9_fdct16x16_sse2,
874
                   &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
875 876 877
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
878 879 880 881 882 883 884 885
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0,
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1,
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2,
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
                   VPX_BITS_8)));
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
        make_tuple(&vp9_highbd_fdct16x16_sse2,
                   &idct16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fdct16x16_c,
                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fdct16x16_sse2,
                   &idct16x16_12, 0, VPX_BITS_12),
        make_tuple(&vp9_highbd_fdct16x16_c,
                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
        make_tuple(&vp9_fdct16x16_sse2,
                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3,
                   VPX_BITS_8)));
// Optimizations take effect at a threshold of 3155, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans16x16DCT,
    ::testing::Values(
        make_tuple(&idct16x16_10_add_10_c,
                   &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10,
                   &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10_add_12_c,
                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
        make_tuple(&idct16x16_12,
                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
924

925 926
// TODO(jingning) Re-enable the mips/msa unit test.
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE && 0
927 928 929
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16DCT,
    ::testing::Values(
930
        make_tuple(&vp9_fdct16x16_msa,
931 932 933 934
                   &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16HT,
    ::testing::Values(
935 936 937 938 939
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
                   VPX_BITS_8)));
940
#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
Daniel Kang's avatar
Daniel Kang committed
941
}  // namespace