dct16x16_test.cc 29 KB
Newer Older
Daniel Kang's avatar
Daniel Kang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <math.h>
#include <stdlib.h>
#include <string.h>

#include "third_party/googletest/src/include/gtest/gtest.h"
16

17
#include "./vp10_rtcd.h"
18
#include "./vpx_dsp_rtcd.h"
19
20
21
22
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
23
24
#include "vp10/common/entropy.h"
#include "vp10/common/scan.h"
25
#include "vpx/vpx_codec.h"
26
#include "vpx/vpx_integer.h"
27
#include "vpx_ports/mem.h"
28

Daniel Kang's avatar
Daniel Kang committed
29
30
31
using libvpx_test::ACMRandom;

namespace {
32
33
34
35

#ifdef _MSC_VER
static int round(double x) {
  if (x < 0)
Yaowu Xu's avatar
Yaowu Xu committed
36
    return static_cast<int>(ceil(x - 0.5));
37
  else
Yaowu Xu's avatar
Yaowu Xu committed
38
    return static_cast<int>(floor(x + 0.5));
39
40
}
#endif
Daniel Kang's avatar
Daniel Kang committed
41

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
const int kNumCoeffs = 256;
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
const double C3 = 0.956940335732209;
const double C4 = 0.923879532511287;
const double C5 = 0.881921264348355;
const double C6 = 0.831469612302545;
const double C7 = 0.773010453362737;
const double C8 = 0.707106781186548;
const double C9 = 0.634393284163646;
const double C10 = 0.555570233019602;
const double C11 = 0.471396736825998;
const double C12 = 0.38268343236509;
const double C13 = 0.290284677254462;
const double C14 = 0.195090322016128;
const double C15 = 0.098017140329561;
58

Jingning Han's avatar
Jingning Han committed
59
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
Daniel Kang's avatar
Daniel Kang committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
  double step[16];
  double intermediate[16];
  double temp1, temp2;

  // step 1
  step[ 0] = input[0] + input[15];
  step[ 1] = input[1] + input[14];
  step[ 2] = input[2] + input[13];
  step[ 3] = input[3] + input[12];
  step[ 4] = input[4] + input[11];
  step[ 5] = input[5] + input[10];
  step[ 6] = input[6] + input[ 9];
  step[ 7] = input[7] + input[ 8];
  step[ 8] = input[7] - input[ 8];
  step[ 9] = input[6] - input[ 9];
  step[10] = input[5] - input[10];
  step[11] = input[4] - input[11];
  step[12] = input[3] - input[12];
  step[13] = input[2] - input[13];
  step[14] = input[1] - input[14];
  step[15] = input[0] - input[15];

  // step 2
  output[0] = step[0] + step[7];
  output[1] = step[1] + step[6];
  output[2] = step[2] + step[5];
  output[3] = step[3] + step[4];
  output[4] = step[3] - step[4];
  output[5] = step[2] - step[5];
  output[6] = step[1] - step[6];
  output[7] = step[0] - step[7];

92
93
  temp1 = step[ 8] * C7;
  temp2 = step[15] * C9;
Daniel Kang's avatar
Daniel Kang committed
94
95
  output[ 8] = temp1 + temp2;

96
97
  temp1 = step[ 9] * C11;
  temp2 = step[14] * C5;
Daniel Kang's avatar
Daniel Kang committed
98
99
  output[ 9] = temp1 - temp2;

100
101
  temp1 = step[10] * C3;
  temp2 = step[13] * C13;
Daniel Kang's avatar
Daniel Kang committed
102
103
  output[10] = temp1 + temp2;

104
105
  temp1 = step[11] * C15;
  temp2 = step[12] * C1;
Daniel Kang's avatar
Daniel Kang committed
106
107
  output[11] = temp1 - temp2;

108
109
  temp1 = step[11] * C1;
  temp2 = step[12] * C15;
Daniel Kang's avatar
Daniel Kang committed
110
111
  output[12] = temp2 + temp1;

112
113
  temp1 = step[10] * C13;
  temp2 = step[13] * C3;
Daniel Kang's avatar
Daniel Kang committed
114
115
  output[13] = temp2 - temp1;

116
117
  temp1 = step[ 9] * C5;
  temp2 = step[14] * C11;
Daniel Kang's avatar
Daniel Kang committed
118
119
  output[14] = temp2 + temp1;

120
121
  temp1 = step[ 8] * C9;
  temp2 = step[15] * C7;
Daniel Kang's avatar
Daniel Kang committed
122
123
124
125
126
127
128
129
  output[15] = temp2 - temp1;

  // step 3
  step[ 0] = output[0] + output[3];
  step[ 1] = output[1] + output[2];
  step[ 2] = output[1] - output[2];
  step[ 3] = output[0] - output[3];

130
131
  temp1 = output[4] * C14;
  temp2 = output[7] * C2;
Daniel Kang's avatar
Daniel Kang committed
132
133
  step[ 4] = temp1 + temp2;

134
135
  temp1 = output[5] * C10;
  temp2 = output[6] * C6;
Daniel Kang's avatar
Daniel Kang committed
136
137
  step[ 5] = temp1 + temp2;

138
139
  temp1 = output[5] * C6;
  temp2 = output[6] * C10;
Daniel Kang's avatar
Daniel Kang committed
140
141
  step[ 6] = temp2 - temp1;

142
143
  temp1 = output[4] * C2;
  temp2 = output[7] * C14;
Daniel Kang's avatar
Daniel Kang committed
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
  step[ 7] = temp2 - temp1;

  step[ 8] = output[ 8] + output[11];
  step[ 9] = output[ 9] + output[10];
  step[10] = output[ 9] - output[10];
  step[11] = output[ 8] - output[11];

  step[12] = output[12] + output[15];
  step[13] = output[13] + output[14];
  step[14] = output[13] - output[14];
  step[15] = output[12] - output[15];

  // step 4
  output[ 0] = (step[ 0] + step[ 1]);
  output[ 8] = (step[ 0] - step[ 1]);

160
161
  temp1 = step[2] * C12;
  temp2 = step[3] * C4;
Daniel Kang's avatar
Daniel Kang committed
162
  temp1 = temp1 + temp2;
163
  output[ 4] = 2*(temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
164

165
166
  temp1 = step[2] * C4;
  temp2 = step[3] * C12;
Daniel Kang's avatar
Daniel Kang committed
167
  temp1 = temp2 - temp1;
168
  output[12] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
169

170
171
  output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
  output[14] = 2 * ((step[7] - step[ 6]) * C8);
Daniel Kang's avatar
Daniel Kang committed
172
173
174
175
176
177
178
179
180

  temp1 = step[4] - step[5];
  temp2 = step[6] + step[7];
  output[ 6] = (temp1 + temp2);
  output[10] = (temp1 - temp2);

  intermediate[8] = step[8] + step[14];
  intermediate[9] = step[9] + step[15];

181
182
  temp1 = intermediate[8] * C12;
  temp2 = intermediate[9] * C4;
Daniel Kang's avatar
Daniel Kang committed
183
  temp1 = temp1 - temp2;
184
  output[3] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
185

186
187
  temp1 = intermediate[8] * C4;
  temp2 = intermediate[9] * C12;
Daniel Kang's avatar
Daniel Kang committed
188
  temp1 = temp2 + temp1;
189
  output[13] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
190

191
  output[ 9] = 2 * ((step[10] + step[11]) * C8);
Daniel Kang's avatar
Daniel Kang committed
192
193
194
195
196
197
198
199
200
201

  intermediate[11] = step[10] - step[11];
  intermediate[12] = step[12] + step[13];
  intermediate[13] = step[12] - step[13];
  intermediate[14] = step[ 8] - step[14];
  intermediate[15] = step[ 9] - step[15];

  output[15] = (intermediate[11] + intermediate[12]);
  output[ 1] = -(intermediate[11] - intermediate[12]);

202
  output[ 7] = 2 * (intermediate[13] * C8);
Daniel Kang's avatar
Daniel Kang committed
203

204
205
  temp1 = intermediate[14] * C12;
  temp2 = intermediate[15] * C4;
Daniel Kang's avatar
Daniel Kang committed
206
  temp1 = temp1 - temp2;
207
  output[11] = -2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
208

209
210
  temp1 = intermediate[14] * C4;
  temp2 = intermediate[15] * C12;
Daniel Kang's avatar
Daniel Kang committed
211
  temp1 = temp2 + temp1;
212
  output[ 5] = 2 * (temp1 * C8);
Daniel Kang's avatar
Daniel Kang committed
213
214
}

215
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
Daniel Kang's avatar
Daniel Kang committed
216
217
218
219
  // First transform columns
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
220
      temp_in[j] = input[j * 16 + i];
Daniel Kang's avatar
Daniel Kang committed
221
222
    butterfly_16x16_dct_1d(temp_in, temp_out);
    for (int j = 0; j < 16; ++j)
223
      output[j * 16 + i] = temp_out[j];
Daniel Kang's avatar
Daniel Kang committed
224
225
226
227
228
  }
  // Then transform rows
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
229
      temp_in[j] = output[j + i * 16];
Daniel Kang's avatar
Daniel Kang committed
230
231
232
    butterfly_16x16_dct_1d(temp_in, temp_out);
    // Scale by some magic number
    for (int j = 0; j < 16; ++j)
233
      output[j + i * 16] = temp_out[j]/2;
Daniel Kang's avatar
Daniel Kang committed
234
235
236
  }
}

237
238
239
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
240
                        int tx_type);
241
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
242
                        int tx_type);
Daniel Kang's avatar
Daniel Kang committed
243

244
245
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
246
247
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
    Idct16x16Param;
248

249
void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
250
                   int /*tx_type*/) {
251
  vpx_fdct16x16_c(in, out, stride);
252
253
}

254
void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
255
                   int /*tx_type*/) {
256
  vpx_idct16x16_256_add_c(in, dest, stride);
257
258
}

259
260
void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                  int tx_type) {
261
  vp10_fht16x16_c(in, out, stride, tx_type);
262
263
}

264
265
void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                  int tx_type) {
266
  vp10_iht16x16_256_add_c(in, dest, stride, tx_type);
267
268
}

269
#if CONFIG_VPX_HIGHBITDEPTH
270
void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
271
  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
272
273
274
}

void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
275
  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
276
277
278
279
280
281
282
283
284
285
286
287
288
}

void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_10(in, out, stride);
}

void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
                      int tx_type) {
  idct16x16_12(in, out, stride);
}

void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
289
  vp10_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
290
291
292
}

void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
293
  vp10_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
294
}
295
296

void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
297
  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
298
299
300
}

void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
301
  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
302
303
304
305
}

#if HAVE_SSE2
void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
306
  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
307
308
309
}

void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
310
  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
311
312
313
}

void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
314
  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
315
316
317
}

void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
318
  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
319
320
}
#endif  // HAVE_SSE2
321
#endif  // CONFIG_VPX_HIGHBITDEPTH
322

323
class Trans16x16TestBase {
324
 public:
325
  virtual ~Trans16x16TestBase() {}
326

327
 protected:
328
  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
329

330
  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
331
332
333

  void RunAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
334
335
    uint32_t max_error = 0;
    int64_t total_error = 0;
336
337
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
338
339
340
341
      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
342
#if CONFIG_VPX_HIGHBITDEPTH
343
344
      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
345
#endif
346

347
      // Initialize a test block with input range [-mask_, mask_].
348
      for (int j = 0; j < kNumCoeffs; ++j) {
349
350
351
352
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          test_input_block[j] = src[j] - dst[j];
353
#if CONFIG_VPX_HIGHBITDEPTH
354
355
356
357
358
359
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          test_input_block[j] = src16[j] - dst16[j];
#endif
        }
360
361
      }

362
363
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
                                          test_temp_block, pitch_));
364
365
366
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, dst, pitch_));
367
#if CONFIG_VPX_HIGHBITDEPTH
368
369
370
371
372
      } else {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
373
374

      for (int j = 0; j < kNumCoeffs; ++j) {
375
#if CONFIG_VPX_HIGHBITDEPTH
376
377
378
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ?  dst[j] - src[j] : dst16[j] - src16[j];
#else
379
        const uint32_t diff = dst[j] - src[j];
380
#endif
381
        const uint32_t error = diff * diff;
382
383
384
385
        if (max_error < error)
          max_error = error;
        total_error += error;
      }
Scott LaVarnway's avatar
Scott LaVarnway committed
386
    }
Daniel Kang's avatar
Daniel Kang committed
387

388
    EXPECT_GE(1u  << 2 * (bit_depth_ - 8), max_error)
389
390
        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";

391
    EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
392
        << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
393
394
  }

395
  void RunCoeffCheck() {
396
397
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
398
399
400
    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
401

402
    for (int i = 0; i < count_test_block; ++i) {
403
      // Initialize a test block with input range [-mask_, mask_].
404
      for (int j = 0; j < kNumCoeffs; ++j)
405
        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
406
407

      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
408
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
409
410
411
412
413
414
415
416
417
418

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j)
        EXPECT_EQ(output_block[j], output_ref_block[j]);
    }
  }

  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
419
420
421
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
Scott LaVarnway's avatar
Scott LaVarnway committed
422

423
    for (int i = 0; i < count_test_block; ++i) {
424
      // Initialize a test block with input range [-mask_, mask_].
425
      for (int j = 0; j < kNumCoeffs; ++j) {
426
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
427
      }
428
      if (i == 0) {
429
        for (int j = 0; j < kNumCoeffs; ++j)
430
          input_extreme_block[j] = mask_;
431
      } else if (i == 1) {
432
        for (int j = 0; j < kNumCoeffs; ++j)
433
          input_extreme_block[j] = -mask_;
434
      }
435

436
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
437
438
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
                                          output_block, pitch_));
439
440
441

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j) {
442
        EXPECT_EQ(output_block[j], output_ref_block[j]);
443
        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
444
445
            << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
      }
446
    }
447
448
  }

449
450
  void RunQuantCheck(int dc_thred, int ac_thred) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
451
    const int count_test_block = 100000;
452
453
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
454

455
456
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
457
#if CONFIG_VPX_HIGHBITDEPTH
458
459
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
460
#endif
461
462

    for (int i = 0; i < count_test_block; ++i) {
463
      // Initialize a test block with input range [-mask_, mask_].
464
      for (int j = 0; j < kNumCoeffs; ++j) {
465
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
466
467
468
      }
      if (i == 0)
        for (int j = 0; j < kNumCoeffs; ++j)
469
          input_extreme_block[j] = mask_;
470
471
      if (i == 1)
        for (int j = 0; j < kNumCoeffs; ++j)
472
          input_extreme_block[j] = -mask_;
473
474
475
476

      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

      // clear reconstructed pixel buffers
James Zern's avatar
James Zern committed
477
478
      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
479
#if CONFIG_VPX_HIGHBITDEPTH
James Zern's avatar
James Zern committed
480
481
      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
482
#endif
483
484
485
486
487

      // quantization with maximum allowed step sizes
      output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
      for (int j = 1; j < kNumCoeffs; ++j)
        output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
488
489
490
      if (bit_depth_ == VPX_BITS_8) {
        inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
491
#if CONFIG_VPX_HIGHBITDEPTH
492
493
494
495
496
497
498
499
500
501
      } else {
        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
                     tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block,
                                            CONVERT_TO_BYTEPTR(dst16), pitch_));
#endif
      }
      if (bit_depth_ == VPX_BITS_8) {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref[j], dst[j]);
502
#if CONFIG_VPX_HIGHBITDEPTH
503
504
505
506
507
      } else {
        for (int j = 0; j < kNumCoeffs; ++j)
          EXPECT_EQ(ref16[j], dst16[j]);
#endif
      }
508
509
510
    }
  }

511
512
513
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
514
515
516
517
    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
518
#if CONFIG_VPX_HIGHBITDEPTH
519
520
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
521
#endif  // CONFIG_VPX_HIGHBITDEPTH
Daniel Kang's avatar
Daniel Kang committed
522

523
524
    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
Daniel Kang's avatar
Daniel Kang committed
525

526
527
      // Initialize a test block with input range [-255, 255].
      for (int j = 0; j < kNumCoeffs; ++j) {
528
529
530
531
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          in[j] = src[j] - dst[j];
532
#if CONFIG_VPX_HIGHBITDEPTH
533
534
535
536
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
537
#endif  // CONFIG_VPX_HIGHBITDEPTH
538
        }
539
540
541
542
      }

      reference_16x16_dct_2d(in, out_r);
      for (int j = 0; j < kNumCoeffs; ++j)
543
        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
544

545
546
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
547
#if CONFIG_VPX_HIGHBITDEPTH
548
549
550
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
551
#endif  // CONFIG_VPX_HIGHBITDEPTH
552
      }
553
554

      for (int j = 0; j < kNumCoeffs; ++j) {
555
#if CONFIG_VPX_HIGHBITDEPTH
556
557
558
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
559
        const uint32_t diff = dst[j] - src[j];
560
#endif  // CONFIG_VPX_HIGHBITDEPTH
561
562
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
563
564
565
            << "Error: 16x16 IDCT has error " << error
            << " at index " << j;
      }
Daniel Kang's avatar
Daniel Kang committed
566
567
    }
  }
568
569
570
571
572

  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 10;
573
    const int16_t *scan = vp10_default_scan_orders[TX_16X16].scan;
574
575
576
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
577
#if CONFIG_VPX_HIGHBITDEPTH
578
579
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
580
#endif  // CONFIG_VPX_HIGHBITDEPTH
581
582
583
584
585
586
587
588
589
590
591
592

    for (int i = 0; i < count_test_block; ++i) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
        } else {
          coeff[scan[j]] = 0;
        }
        if (bit_depth_ == VPX_BITS_8) {
          dst[j] = 0;
          ref[j] = 0;
593
#if CONFIG_VPX_HIGHBITDEPTH
594
595
596
        } else {
          dst16[j] = 0;
          ref16[j] = 0;
597
#endif  // CONFIG_VPX_HIGHBITDEPTH
598
599
600
601
602
603
        }
      }
      if (bit_depth_ == VPX_BITS_8) {
        ref_txfm(coeff, ref, pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
      } else {
604
#if CONFIG_VPX_HIGHBITDEPTH
605
606
607
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                 pitch_));
608
#endif  // CONFIG_VPX_HIGHBITDEPTH
609
610
611
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
612
#if CONFIG_VPX_HIGHBITDEPTH
613
614
615
616
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
        const uint32_t diff = dst[j] - ref[j];
617
#endif  // CONFIG_VPX_HIGHBITDEPTH
618
619
620
621
622
623
624
625
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
            << "Error: 16x16 IDCT Comparison has error " << error
            << " at index " << j;
      }
    }
  }

626
627
  int pitch_;
  int tx_type_;
628
629
  vpx_bit_depth_t bit_depth_;
  int mask_;
630
631
  FhtFunc fwd_txfm_ref;
  IhtFunc inv_txfm_ref;
632
};
Daniel Kang's avatar
Daniel Kang committed
633

634
635
class Trans16x16DCT
    : public Trans16x16TestBase,
636
      public ::testing::TestWithParam<Dct16x16Param> {
637
638
 public:
  virtual ~Trans16x16DCT() {}
Daniel Kang's avatar
Daniel Kang committed
639

640
641
642
643
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
644
    bit_depth_ = GET_PARAM(3);
645
    pitch_    = 16;
646
    fwd_txfm_ref = fdct16x16_ref;
647
    inv_txfm_ref = idct16x16_ref;
648
    mask_ = (1 << bit_depth_) - 1;
649
#if CONFIG_VPX_HIGHBITDEPTH
650
    switch (bit_depth_) {
651
      case VPX_BITS_10:
652
653
        inv_txfm_ref = idct16x16_10_ref;
        break;
654
      case VPX_BITS_12:
655
656
657
658
659
660
661
662
663
        inv_txfm_ref = idct16x16_12_ref;
        break;
      default:
        inv_txfm_ref = idct16x16_ref;
        break;
    }
#else
    inv_txfm_ref = idct16x16_ref;
#endif
664
665
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
Daniel Kang's avatar
Daniel Kang committed
666

667
 protected:
668
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
669
670
    fwd_txfm_(in, out, stride);
  }
671
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
672
    inv_txfm_(out, dst, stride);
Daniel Kang's avatar
Daniel Kang committed
673
  }
674

675
676
  FdctFunc fwd_txfm_;
  IdctFunc inv_txfm_;
677
678
679
680
};

TEST_P(Trans16x16DCT, AccuracyCheck) {
  RunAccuracyCheck();
Daniel Kang's avatar
Daniel Kang committed
681
}
682

683
684
685
686
687
688
TEST_P(Trans16x16DCT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16DCT, MemCheck) {
  RunMemCheck();
689
690
}

691
692
693
694
695
696
TEST_P(Trans16x16DCT, QuantCheck) {
  // Use maximally allowed quantization step sizes for DC and AC
  // coefficients respectively.
  RunQuantCheck(1336, 1828);
}

697
698
699
700
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
  RunInvAccuracyCheck();
}

701
702
class Trans16x16HT
    : public Trans16x16TestBase,
703
      public ::testing::TestWithParam<Ht16x16Param> {
704
705
706
707
708
709
710
 public:
  virtual ~Trans16x16HT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
711
    bit_depth_ = GET_PARAM(3);
712
713
    pitch_    = 16;
    fwd_txfm_ref = fht16x16_ref;
714
    inv_txfm_ref = iht16x16_ref;
715
    mask_ = (1 << bit_depth_) - 1;
716
#if CONFIG_VPX_HIGHBITDEPTH
717
718
719
720
721
722
723
724
725
726
727
728
729
730
    switch (bit_depth_) {
      case VPX_BITS_10:
        inv_txfm_ref = iht16x16_10;
        break;
      case VPX_BITS_12:
        inv_txfm_ref = iht16x16_12;
        break;
      default:
        inv_txfm_ref = iht16x16_ref;
        break;
    }
#else
    inv_txfm_ref = iht16x16_ref;
#endif
731
  }
732
733
734
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
735
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
736
    fwd_txfm_(in, out, stride, tx_type_);
737
  }
738
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
739
    inv_txfm_(out, dst, stride, tx_type_);
740
741
  }

742
743
  FhtFunc fwd_txfm_;
  IhtFunc inv_txfm_;
744
745
746
747
748
749
};

TEST_P(Trans16x16HT, AccuracyCheck) {
  RunAccuracyCheck();
}

750
751
752
753
754
755
TEST_P(Trans16x16HT, CoeffCheck) {
  RunCoeffCheck();
}

TEST_P(Trans16x16HT, MemCheck) {
  RunMemCheck();
756
757
}

758
759
760
TEST_P(Trans16x16HT, QuantCheck) {
  // The encoder skips any non-DC intra prediction modes,
  // when the quantization step size goes beyond 988.
761
  RunQuantCheck(429, 729);
762
763
}

764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
class InvTrans16x16DCT
    : public Trans16x16TestBase,
      public ::testing::TestWithParam<Idct16x16Param> {
 public:
  virtual ~InvTrans16x16DCT() {}

  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    thresh_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
    pitch_ = 16;
    mask_ = (1 << bit_depth_) - 1;
}
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }

  IdctFunc ref_txfm_;
  IdctFunc inv_txfm_;
  int thresh_;
};

TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
}

795
796
using std::tr1::make_tuple;

797
#if CONFIG_VPX_HIGHBITDEPTH
798
799
800
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
801
802
        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
803
        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
804
#else
805
806
807
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
808
        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
809
#endif  // CONFIG_VPX_HIGHBITDEPTH
810

811
#if CONFIG_VPX_HIGHBITDEPTH
812
813
814
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
815
816
817
818
819
820
821
822
823
824
825
826
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
        make_tuple(&vp10_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 3, VPX_BITS_8)));
827
828
829
830
#else
INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
    ::testing::Values(
831
832
833
834
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_c, &vp10_iht16x16_256_add_c, 3, VPX_BITS_8)));
835
#endif  // CONFIG_VPX_HIGHBITDEPTH
836

837
#if HAVE_NEON_ASM && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
James Zern's avatar
James Zern committed
838
839
840
INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
    ::testing::Values(
841
        make_tuple(&vpx_fdct16x16_c,
842
                   &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
James Zern's avatar
James Zern committed
843
844
#endif

845
#if HAVE_SSE2 && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
846
847
848
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
849
        make_tuple(&vpx_fdct16x16_sse2,
850
                   &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
851
852
853
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
854
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 0,
855
                   VPX_BITS_8),
856
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 1,
857
                   VPX_BITS_8),
858
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 2,
859
                   VPX_BITS_8),
860
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 3,
861
                   VPX_BITS_8)));
862
#endif  // HAVE_SSE2 && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
863

864
#if HAVE_SSE2 && CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
865
866
867
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
868
        make_tuple(&vpx_highbd_fdct16x16_sse2,
869
                   &idct16x16_10, 0, VPX_BITS_10),
870
        make_tuple(&vpx_highbd_fdct16x16_c,
871
                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
872
        make_tuple(&vpx_highbd_fdct16x16_sse2,
873
                   &idct16x16_12, 0, VPX_BITS_12),
874
        make_tuple(&vpx_highbd_fdct16x16_c,
875
                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
876
        make_tuple(&vpx_fdct16x16_sse2,
877
                   &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
878
879
880
INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
881
882
883
884
885
886
887
        make_tuple(&vp10_fht16x16_sse2,
                   &vp10_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_sse2,
                   &vp10_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_sse2,
                   &vp10_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_c, 3,
888
889
890
891
892
893
894
895
896
897
898
899
900
901
                   VPX_BITS_8)));
// Optimizations take effect at a threshold of 3155, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans16x16DCT,
    ::testing::Values(
        make_tuple(&idct16x16_10_add_10_c,
                   &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10,
                   &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10),
        make_tuple(&idct16x16_10_add_12_c,
                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
        make_tuple(&idct16x16_12,
                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
902
#endif  // HAVE_SSE2 && CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
903

904
#if HAVE_MSA && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
905
906
907
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16DCT,
    ::testing::Values(
908
        make_tuple(&vpx_fdct16x16_msa,
909
                   &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8)));
910
911
912
INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16HT,
    ::testing::Values(
913
914
915
916
917
918
919
920
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 0, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 1, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp10_fht16x16_msa,
                   &vp10_iht16x16_256_add_msa, 3, VPX_BITS_8)));
921
#endif  // HAVE_MSA && !CONFIG_VPX_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
Daniel Kang's avatar
Daniel Kang committed
922
}  // namespace