dct16x16_test.cc 29 KB
Newer Older
Daniel Kang's avatar
Daniel Kang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <math.h>
#include <stdlib.h>
#include <string.h>

#include "third_party/googletest/src/include/gtest/gtest.h"
16

17
#include "./vp10_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19 20 21 22
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
23 24
#include "vp10/common/entropy.h"
#include "vp10/common/scan.h"
25
#include "vpx/vpx_codec.h"
26
#include "vpx/vpx_integer.h"
27
#include "vpx_ports/mem.h"
28

Daniel Kang's avatar
Daniel Kang committed
29 30 31
using libvpx_test::ACMRandom;

namespace {
32 33 34 35

#ifdef _MSC_VER
static int round(double x) {
  if (x < 0)
Yaowu Xu's avatar
Yaowu Xu committed
36
    return static_cast<int>(ceil(x - 0.5));
37
  else
Yaowu Xu's avatar
Yaowu Xu committed
38
    return static_cast<int>(floor(x + 0.5));
39 40
}
#endif
Daniel Kang's avatar
Daniel Kang committed
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
const int kNumCoeffs = 256;
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
const double C3 = 0.956940335732209;
const double C4 = 0.923879532511287;
const double C5 = 0.881921264348355;
const double C6 = 0.831469612302545;
const double C7 = 0.773010453362737;
const double C8 = 0.707106781186548;
const double C9 = 0.634393284163646;
const double C10 = 0.555570233019602;
const double C11 = 0.471396736825998;
const double C12 = 0.38268343236509;
const double C13 = 0.290284677254462;
const double C14 = 0.195090322016128;
const double C15 = 0.098017140329561;
58

Jingning Han's avatar
Jingning Han committed
59
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
Daniel Kang's avatar
Daniel Kang committed
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
  double step[16];
  double intermediate[16];
  double temp1, temp2;

  // step 1
  step[ 0] = input[0] + input[15];
  step[ 1] = input[1] + input[14];
  step[ 2] = input[2] + input[13];
  step[ 3] = input[3] + input[12];
  step[ 4] = input[4] + input[11];
  step[ 5] = input[5] + input[10];
  step[ 6] = input[6] + input[ 9];
  step[ 7] = input[7] + input[ 8];
  step[ 8] = input[7] - input[ 8];
  step[ 9] = input[6] - input[ 9];
  step[10] = input[5] - input[10];
  step[11] = input[4] - input[11];
  step[12] = input[3] - input[12];
  step[13] = input[2] - input[13];
  step[14] = input[1] - input[14];
  step[15] = input[0] - input[15];

  // step 2
  output[0] = step[0] + step[7];
  output[1] = step[1] + step[6];
  output[2] = step[2] + step[5];
  output[3] = step[3] + step[4];
  output[4] = step[3] - step[4];
  output[5] = step[2] - step[5];
  output[6] = step[1] - step[6];
  output[7] = step[0] - step[7];

92 93
  temp1 = step[ 8] * C7;
  temp2 = step[15] * C9;
Daniel Kang's avatar
Daniel Kang committed
94 95
  output[ 8] = temp1 + temp2;

96 97
  temp1 = step[ 9] * C11;
  temp2 = step[14] * C5;
Daniel Kang's avatar
Daniel Kang committed
98 99
  output[ 9] = temp1 - temp2;

100 101
  temp1 = step[10] * C3;
  temp2 = step[13] * C13;
Daniel Kang's avatar
Daniel Kang committed
102 103
  output[10] = temp1 + temp2;

104 105
  temp1 = step[11] * C15;
  temp2 = step[12] * C1;
Daniel Kang's avatar
Daniel Kang committed
106 107
  output[11] = temp1 - temp2;

108 109
  temp1 = step[11] * C1;
  temp2 = step[12] * C15;
Daniel Kang's avatar
Daniel Kang committed
110 111
  output[12] = temp2 + temp1;

112 113
  temp1 = step[10] * C13;
  temp2 = step[13] * C3;
Daniel Kang's avatar
Daniel Kang committed
114 115
  output[13] = temp2 - temp1;

116 117
  temp1 = step[ 9] * C5;
  temp2 = step[14] * C11;
Daniel Kang's avatar
Daniel Kang committed
118 119
  output[14] = temp2 + temp1;

120 121
  temp1 = step[ 8] * C9;
  temp2 = step[15] * C7;
Daniel Kang's avatar
Daniel Kang committed
122 123 124 125 126 127 128 129
  output[15] = temp2 - temp1;

  // step 3
  step[ 0] = output[0] + output[3];
  step[ 1] = output[1] + output[2];
  step[ 2] = output[1] - output[2];
  step[ 3] = output[0] - output[3];

130 131
  temp1 = output[4] * C14;
  temp2 = output[7] * C2;
Daniel Kang's avatar
Daniel Kang committed
132 133
  step[ 4] = temp1 + temp2;

134 135
  temp1 = output[5] * C10;
  temp2 = output[6] * C6;
Daniel Kang's avatar
Daniel Kang committed
136 137
  step[ 5] = temp1 + temp2;

138 139
  temp1 = output[5] * C6;
  temp2 = output[6] * C10;
Daniel Kang's avatar
Daniel Kang committed
140 141
  step[ 6] = temp2 - temp1;

142 143
  temp1 = output[4] * C2;
  temp2 = output[7] * C14;
Daniel Kang's avatar
Daniel Kang committed
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
  step[ 7] = temp2 - temp1;

  step[ 8] = output[ 8] + output[11];
  step[ 9] = output[ 9] + output[10];
  step[10] = output[ 9] - output[10];
  step[11] = output[ 8] - output[11];

  step[12] = output[12] + output[15];
  step[13] = output[13] + output[14];
  step[14] = output[13] - output[14];
  step[15] = output[12] - output[15];

  // step 4
  output[ 0] = (step[ 0] + step[ 1]);
  output[ 8] = (step[ 0] - step[ 1]);

160 161
  temp1 = step[2] * C12;
  temp2 = step[3] * C4;
Daniel Kang's avatar
Daniel Kang committed
162
  temp1 = temp1 + temp2;
163
  output[ 4] = 2*(temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
164

165 166
  temp1 = step[2] * C4;
  temp2 = step[3] * C12;
Daniel Kang's avatar
Daniel Kang committed
167
  temp1 = temp2 - temp1;
168
  output[12] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
169

170 171
  output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
  output[14] = 2 * ((step[7] - step[ 6]) * C8);
Daniel Kang's avatar
Daniel Kang committed
172 173 174 175 176 177 178 179 180

  temp1 = step[4] - step[5];
  temp2 = step[6] + step[7];
  output[ 6] = (temp1 + temp2);
  output[10] = (temp1 - temp2);

  intermediate[8] = step[8] + step[14];
  intermediate[9] = step[9] + step[15];

181 182
  temp1 = intermediate[8] * C12;
  temp2 = intermediate[9] * C4;
Daniel Kang's avatar
Daniel Kang committed
183
  temp1 = temp1 - temp2;
184
  output[3] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
185

186 187
  temp1 = intermediate[8] * C4;
  temp2 = intermediate[9] * C12;
Daniel Kang's avatar
Daniel Kang committed
188
  temp1 = temp2 + temp1;
189
  output[13] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
190

191
  output[ 9] = 2 * ((step[10] + step[11]) * C8);
Daniel Kang's avatar
Daniel Kang committed
192 193 194 195 196 197 198 199 200 201

  intermediate[11] = step[10] - step[11];
  intermediate[12] = step[12] + step[13];
  intermediate[13] = step[12] - step[13];
  intermediate[14] = step[ 8] - step[14];
  intermediate[15] = step[ 9] - step[15];

  output[15] = (intermediate[11] + intermediate[12]);
  output[ 1] = -(intermediate[11] - intermediate[12]);

202
  output[ 7] = 2 * (intermediate[13] * C8);
Daniel Kang's avatar
Daniel Kang committed
203

204 205
  temp1 = intermediate[14] * C12;
  temp2 = intermediate[15] * C4;
Daniel Kang's avatar
Daniel Kang committed
206
  temp1 = temp1 - temp2;
207
  output[11] = -2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
208

209 210
  temp1 = intermediate[14] * C4;
  temp2 = intermediate[15] * C12;
Daniel Kang's avatar
Daniel Kang committed
211
  temp1 = temp2 + temp1;
212
  output[ 5] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
213 214
}

215
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
Daniel Kang's avatar
Daniel Kang committed
216 217 218 219
  // First transform columns
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
220
      temp_in[j] = input[j * 16 + i];
Daniel Kang's avatar
Daniel Kang committed
221 222
    butterfly_16x16_dct_1d(temp_in, temp_out);
    for (int j = 0; j < 16; ++j)
223
      output[j * 16 + i] = temp_out[j];
Daniel Kang's avatar
Daniel Kang committed
224 225 226 227 228
  }
  // Then transform rows
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
229
      temp_in[j] = output[j + i * 16];
Daniel Kang's avatar
Daniel Kang committed
230 231 232
    butterfly_16x16_dct_1d(temp_in, temp_out);
    // Scale by some magic number
    for (int j = 0; j < 16; ++j)
233
      output[j + i * 16] = temp_out[j]/2;
Daniel Kang's avatar
Daniel Kang committed
234 235 236
  }
}

237 238 239
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
240
                        int tx_type);
241
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
242
                        int tx_type);
Daniel Kang's avatar
Daniel Kang committed
243

244 245
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
246 247
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
    Idct16x16Param;
248

249
void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
250
                   int /*tx_type*/) {
251
  vpx_fdct16x16_c(in, out, stride);
252 253
}

254
void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
255
                   int /*tx_type*/) {
256
  vpx_idct16x16_256_add_c(in, dest, stride);
257 258
}

259 260
void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                  int tx_type) {
261
  vp10_fht16x16_c(in, out, stride, tx_type);
262 263
}

264 265
void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                  int tx_type) {
266
  vp10_iht16x16_256_add_c(in, dest, stride, tx_type);
267 268
}

269
#if CONFIG_VPX_HIGHBITDEPTH
270
void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
271
  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
272 273 274
}

void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
275
  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
276 277 278 279 280 281 282 283 284 285 286 287 288
}

void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_10(in, out, stride);
}

void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_12(in, out, stride);
}

void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
289
  vp10_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
290 291 292
}

void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
293
  vp10_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
294
}
295 296

void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
297
  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
298 299 300
}

void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
301
  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
302 303 304 305
}

#if HAVE_SSE2
void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
306
  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
307 308 309
}

void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
310
  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
311 312 313
}

void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
314
  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
315 316 317
}

void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
318
  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
319 320
}
#endif  // HAVE_SSE2
321
#endif  // CONFIG_VPX_HIGHBITDEPTH
322

323
class Trans16x16TestBase {
324
 public:
325
  virtual ~Trans16x16TestBase() {}
326

327
 protected:
328
  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
329

330
  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
331 332 333

  void RunAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
334 335
    uint32_t max_error = 0;
    int64_t total_error = 0;
336 337
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
338 339 340 341
      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
342
#if CONFIG_VPX_HIGHBITDEPTH
343 344
      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
345
#endif
346

347
      // Initialize a test block with input range [-mask_, mask_].
348
      for (int j = 0; j < kNumCoeffs; ++j) {
349 350 351 352
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          test_input_block[j] = src[j] - dst[j];
353
#if CONFIG_VPX_HIGHBITDEPTH
354 355 356 357 358 359
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          test_input_block[j] = src16[j] - dst16[j];
#endif
        }
360 361
      }

362 363
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
                                          test_temp_block, pitch_));
364 365 366
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, dst, pitch_));
367
#if CONFIG_VPX_HIGHBITDEPTH
368 369 370 371 372
      } else {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
373 374

      for (int j = 0; j < kNumCoeffs; ++j) {
375
#if CONFIG_VPX_HIGHBITDEPTH
376 377 378
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ?  dst[j] - src[j] : dst16[j] - src16[j];
#else
379
        const uint32_t diff = dst[j] - src[j];
380
#endif
381
        const uint32_t error = diff * diff;
382 383 384 385
        if (max_error < error)
          max_error = error;
        total_error += error;
      }
Scott LaVarnway's avatar
Scott LaVarnway committed
386
    }
Daniel Kang's avatar
Daniel Kang committed
387

388
    EXPECT_GE(1u  << 2 * (bit_depth_ - 8), max_error)
389 390
        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";

391
    EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
392
        << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
393 394
  }

395
  void RunCoeffCheck() {
396 397
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
398 399 400
    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
401

402
    for (int i = 0; i < count_test_block; ++i) {
403
      // Initialize a test block with input range [-mask_, mask_].
404
      for (int j = 0; j < kNumCoeffs; ++j)
405
        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
406 407

      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
408
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
409 410 411 412 413 414 415 416 417 418

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j)
        EXPECT_EQ(output_block[j], output_ref_block[j]);
    }
  }

  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
419 420 421
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
Scott LaVarnway's avatar
Scott LaVarnway committed
422

423
    for (int i = 0; i < count_test_block; ++i) {
424
      // Initialize a test block with input range [-mask_, mask_].
425
      for (int j = 0; j < kNumCoeffs; ++j) {
426
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
427
      }
428
      if (i == 0) {
429
        for (int j = 0; j < kNumCoeffs; ++j)
430
          input_extreme_block[j] = mask_;
431
      } else if (i == 1) {
432
        for (int j = 0; j < kNumCoeffs; ++j)
433
          input_extreme_block[j] = -mask_;
434
      }
435

436
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
437 438
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
                                          output_block, pitch_));
439 440 441

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j) {
442
        EXPECT_EQ(output_block[j], output_ref_block[j]);
443
        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
444 445
            << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
      }
446
    }
447 448
  }

449 450
  void RunQuantCheck(int dc_thred, int ac_thred) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
451
    const int count_test_block = 100000;
452 453
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
454

455 456
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
457
#if CONFIG_VPX_HIGHBITDEPTH
458 459
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
460
#endif
461 462

    for (int i = 0; i < count_test_block; ++i) {
463
      // Initialize a test block with input range [-mask_, mask_].
464
      for (int j = 0; j < kNumCoeffs; ++j) {
465
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
466 467 468
      }
      if (i == 0)
        for (int j = 0; j < kNumCoeffs; ++j)
469
          input_extreme_block[j] = mask_;
470 471
      if (i == 1)
        for (int j = 0; j < kNumCoeffs; ++j)
472
          input_extreme_block[j] = -mask_;
473 474 475 476

      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

      // clear reconstructed pixel buffers
James Zern's avatar
James Zern committed
477 478
      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
479
#if CONFIG_VPX_HIGHBITDEPTH
James Zern's avatar
James Zern committed
480 481
      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
482
#endif
483 484 485 486 487

      // quantization with maximum allowed step sizes
      output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
      for (int j = 1; j < kNumCoeffs; ++j)
        output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
488 489 490
      if (bit_depth_ == VPX_BITS_8) {
        inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
491
#if CONFIG_VPX_HIGHBITDEPTH
492 493 494 495 496 497 498 499 500 501
      } else {
        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
                     tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block,
                                            CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
      if (bit_depth_ == VPX_BITS_8) {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref[j], dst[j]);
502
#if CONFIG_VPX_HIGHBITDEPTH
503 504 505 506 507
      } else {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref16[j], dst16[j]);
#endif
      }
508 509 510
    }
  }

511 512 513
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
514 515 516 517
    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
518
#if CONFIG_VPX_HIGHBITDEPTH
519 520
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
521
#endif  // CONFIG_VPX_HIGHBITDEPTH
Daniel Kang's avatar
Daniel Kang committed
522

523 524
    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
Daniel Kang's avatar
Daniel Kang committed
525

526 527
      // Initialize a test block with input range [-255, 255].
      for (int j = 0; j < kNumCoeffs; ++j) {
528 529 530 531
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          in[j] = src[j] - dst[j];
532
#if CONFIG_VPX_HIGHBITDEPTH
533 534 535 536
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
537
#endif  // CONFIG_VPX_HIGHBITDEPTH
538
        }
539 540 541 542
      }

      reference_16x16_dct_2d(in, out_r);
      for (int j = 0; j < kNumCoeffs; ++j)
543
        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
544

545 546
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
547
#if CONFIG_VPX_HIGHBITDEPTH
548 549 550
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
551
#endif  // CONFIG_VPX_HIGHBITDEPTH
552
      }
553 554

      for (int j = 0; j < kNumCoeffs; ++j) {
555
#if CONFIG_VPX_HIGHBITDEPTH
556 557 558
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
559
        const uint32_t diff = dst[j] - src[j];
560
#endif  // CONFIG_VPX_HIGHBITDEPTH
561 562
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
563 564 565
            << "Error: 16x16 IDCT has error " << error
            << " at index " << j;
      }
Daniel Kang's avatar
Daniel Kang committed
566 567
    }
  }
568 569 570 571 572

  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 10;
573
    const int16_t *scan = vp10_default_scan_orders[TX_16X16].scan;
574 575 576
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
577
#if CONFIG_VPX_HIGHBITDEPTH
578 579
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
580
#endif  // CONFIG_VPX_HIGHBITDEPTH
581 582 583 584 585 586 587 588 589 590 591 592

    for (int i = 0; i < count_test_block; ++i) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
        } else {
          coeff[scan[j]] = 0;
        }
        if (bit_depth_ == VPX_BITS_8) {
          dst[j] = 0;
          ref[j] = 0;
593
#if CONFIG_VPX_HIGHBITDEPTH
594 595 596
        } else {
          dst16[j] = 0;
          ref16[j] = 0;
597
#endif  // CONFIG_VPX_HIGHBITDEPTH
598 599 600 601 602 603
        }
      }
      if (bit_depth_ == VPX_BITS_8) {
        ref_txfm(coeff, ref, pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
      } else {
604
#if CONFIG_VPX_HIGHBITDEPTH
605 606 607
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                 pitch_));
608
#endif  // CONFIG_VPX_HIGHBITDEPTH
609 610 611
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
612
#if CONFIG_VPX_HIGHBITDEPTH
613 614 615 616
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
        const uint32_t diff = dst[j] - ref[j];
617
#endif  // CONFIG_VPX_HIGHBITDEPTH
618 619 620 621 622 623 624 625
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
            << "Error: 16x16 IDCT Comparison has error " << error
            << " at index " << j;
      }
    }
  }

626 627
  int pitch_;
  int tx_type_;
628 629
  vpx_bit_depth_t bit_depth_;
  int mask_;
630 631
  FhtFunc fwd_txfm_ref;
  IhtFunc inv_txfm_ref;
632
};
Daniel Kang's avatar
Daniel Kang committed
633

634 635
class Trans16x16DCT
    : public Trans16x16TestBase,
636
      public ::testing::TestWithParam<Dct16x16Param> {
637 638
 public:
  virtual ~Trans16x16DCT() {}
Daniel Kang's avatar
Daniel Kang committed
639

640 641 642 643
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
644
    bit_depth_ = GET_PARAM(3);
645
    pitch_    = 16;
646
    fwd_txfm_ref = fdct16x16_ref;
647
    inv_txfm_ref = idct16x16_ref;
648
    mask_ = (1 << bit_depth_) - 1;
649
#if CONFIG_VPX_HIGHBITDEPTH
650
    switch (bit_depth_) {
651
      case VPX_BITS_10:
652 653
        inv_txfm_ref = idct16x16_10_ref;
        break;
654
      case VPX_BITS_12:
655 656 657 658 659 660 661 662 663
        inv_txfm_ref = idct16x16_12_ref;
        break;
      default:
        inv_txfm_ref = idct16x16_ref;
        break;
    }
#else
    inv_txfm_ref = idct16x16_ref;
#endif
664 665
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
Daniel Kang's avatar
Daniel Kang committed
666

667
 protected:
668
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
669 670
    fwd_txfm_(in, out, stride);
  }
671
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
672
    inv_txfm_(out, dst, stride);
Daniel Kang's avatar
Daniel Kang committed
673
  }
674

675 676
  FdctFunc fwd_txfm_;
  IdctFunc inv_txfm_;
677 678 679 680
};

TEST_P(Trans16x16DCT, AccuracyCheck) {
  RunAccuracyCheck();
Daniel Kang's avatar
Daniel Kang committed
681
}
682

683 684 685 686 687 688
TEST_P(Trans16x16DCT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16DCT, MemCheck) {
  RunMemCheck();
689 690
}

691 692 693 694 695 696
TEST_P(Trans16x16DCT, QuantCheck) {
  // Use maximally allowed quantization step sizes for DC and AC
  // coefficients respectively.
  RunQuantCheck(1336, 1828);
}

697 698 699 700
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
  RunInvAccuracyCheck();
}

701 702
class Trans16x16HT
    : public Trans16x16TestBase,
703
      public ::testing::TestWithParam<Ht16x16Param> {
704 705 706 707 708 709 710
 public:
  virtual ~Trans16x16HT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
711
    bit_depth_ = GET_PARAM(3);
712 713
    pitch_    = 16;
    fwd_txfm_ref = fht16x16_ref;
714
    inv_txfm_ref = iht16x16_ref;
715
    mask_ = (1 << bit_depth_) - 1;
716
#if CONFIG_VPX_HIGHBITDEPTH
717 718 719 720 721 722 723 724 725 726 727 728 729 730
    switch (bit_depth_) {
      case VPX_BITS_10:
        inv_txfm_ref = iht16x16_10;
        break;
      case VPX_BITS_12:
        inv_txfm_ref = iht16x16_12;
        break;
      default:
        inv_txfm_ref = iht16x16_ref;
        break;
    }
#else
    inv_txfm_ref = iht16x16_ref;
#endif
731
  }
732 733 734
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
735
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
736
    fwd_txfm_(in, out, stride, tx_type_);
737
  }
738
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
739
    inv_txfm_(out, dst, stride, tx_type_);
740 741
  }

742 743
  FhtFunc fwd_txfm_;
  IhtFunc inv_txfm_;
744 745 746 747 748 749
};

TEST_P(Trans16x16HT, AccuracyCheck) {
  RunAccuracyCheck();
}

750 751 752 753 754 755
TEST_P(Trans16x16HT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16HT, MemCheck) {
  RunMemCheck();
756 757
}

758 759 760
TEST_P(Trans16x16HT, QuantCheck) {
  // The encoder skips any non-DC intra prediction modes,
  // when the quantization step size goes beyond 988.
761
  RunQuantCheck(429, 729);
762 763
}

764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
class InvTrans16x16DCT
    : public Trans16x16TestBase,
      public ::testing::TestWithParam<Idct16x16Param> {
 public:
  virtual ~InvTrans16x16DCT() {}

  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    thresh_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
    pitch_ = 16;
    mask_ = (1 << bit_depth_) - 1;
}
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }

  IdctFunc ref_txfm_;
  IdctFunc inv_txfm_;
  int thresh_;
};

TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
}

795 796
using std::tr1::make_tuple;

797
#if CONFIG_VPX_HIGHBITDEPTH
798 799 800
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
801 802
        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
803
        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
804
#else
805 806 807
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
808
        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
809
#endif  // CONFIG_VPX_HIGHBITDEPTH
810

811
#if CONFIG_VPX_HIGHBITDEPTH
812 813 814
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
815 816 817 818 819 820 821 822 823 824 825 826
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 3, VPX_BITS_8)));
827 828 829 830
#else
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
831 832 833 834
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 3, VPX_BITS_8)));
835
#endif  // CONFIG_VPX_HIGHBITDEPTH
836

837
#if HAVE_NEON_ASM && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
James Zern's avatar
James Zern committed
838 839 840
INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
    ::testing::Values(
841
        make_tuple(&vpx_fdct16x16_c,
842
                   &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
James Zern's avatar
James Zern committed
843 844
#endif

845
#if HAVE_SSE2 && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
846 847 848
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
849
        make_tuple(&vpx_fdct16x16_sse2,
850
                   &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
851 852 853
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
854
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 0,
855
                   VPX_BITS_8),
856
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 1,
857
                   VPX_BITS_8),
858
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 2,
859
                   VPX_BITS_8),
860
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 3,
861
                   VPX_BITS_8)));
862
#endif  // HAVE_SSE2 && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
863

864
#if HAVE_SSE2 && CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
865 866 867
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
868
        make_tuple(&vpx_highbd_fdct16x16_sse2,
869
                   &idct16x16_10, 0, VPX_BITS_10),
870
        make_tuple(&vpx_highbd_fdct16x16_c,
871
                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
872
        make_tuple(&vpx_highbd_fdct16x16_sse2,
873
                   &idct16x16_12, 0, VPX_BITS_12),
874
        make_tuple(&vpx_highbd_fdct16x16_c,
875
                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
876
        make_tuple(&vpx_fdct16x16_sse2,
877
                   &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
878 879 880
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
881 882 883 884 885 886 887
        make_tuple(&vp10_fht16x16_sse2,
                   &vp10_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_sse2,
                   &vp10_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_sse2,
                   &vp10_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_c, 3,
888 889 890 891 892 893 894 895 896 897 898 899 900 901
                   VPX_BITS_8)));
// Optimizations take effect at a threshold of 3155, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans16x16DCT,
    ::testing::Values(
        make_tuple(&idct16x16_10_add_10_c,
                   &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10,
                   &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10_add_12_c,
                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
        make_tuple(&idct16x16_12,
                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
902
#endif  // HAVE_SSE2 && CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
903

904
#if HAVE_MSA && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
905 906 907
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16DCT,
    ::testing::Values(
908
        make_tuple(&vpx_fdct16x16_msa,
909
                   &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8)));
910 911 912
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16HT,
    ::testing::Values(
913 914 915 916 917 918 919 920
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 3, VPX_BITS_8)));
921
#endif  // HAVE_MSA && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
Daniel Kang's avatar
Daniel Kang committed
922
}  // namespace