idct.c 63.6 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17 18
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23 24
int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
                 const TX_SIZE tx_size) {
clang-format's avatar
clang-format committed
25
  (void)tx_type;
Yaowu Xu's avatar
Yaowu Xu committed
26
#if CONFIG_AOM_HIGHBITDEPTH
27
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
28
    return txsize_sqr_up_map[tx_size] == TX_32X32;
29 30 31 32
  }
#else
  (void)xd;
#endif
33
  return txsize_sqr_up_map[tx_size] == TX_32X32;
34 35
}

36 37 38
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

39
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
40 41 42 43 44 45 46 47
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 4; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
48
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
49 50 51 52 53 54 55 56 57 58
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
59
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
60 61
}

62
// For use in lieu of ADST
63 64 65 66 67 68 69
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
70 71 72
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
73 74 75 76
  idct16_c(inputhalf, output + 16);
  // Note overall scaling factor is 4 times orthogonal
}

Yaowu Xu's avatar
Yaowu Xu committed
77
#if CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjee's avatar
Debargha Mukherjee committed
78 79 80 81
static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
  for (i = 0; i < 4; ++i)
clang-format's avatar
clang-format committed
82 83
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
84 85 86 87 88
}

static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
clang-format's avatar
clang-format committed
89 90
  (void)bd;
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
91 92 93
}

static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
clang-format's avatar
clang-format committed
94
                             int bd) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
95 96
  int i;
  for (i = 0; i < 16; ++i)
clang-format's avatar
clang-format committed
97 98
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
99 100 101 102 103
}

static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
                             int bd) {
  int i;
clang-format's avatar
clang-format committed
104 105
  (void)bd;
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
106 107
}

108 109 110 111 112 113
static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
                                  int bd) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
clang-format's avatar
clang-format committed
114 115
    inputhalf[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
116
  }
117 118 119
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Yaowu Xu's avatar
Yaowu Xu committed
120
  aom_highbd_idct16_c(inputhalf, output + 16, bd);
121 122
  // Note overall scaling factor is 4 times orthogonal
}
Yaowu Xu's avatar
Yaowu Xu committed
123
#endif  // CONFIG_AOM_HIGHBITDEPTH
124

Jingning Han's avatar
Jingning Han committed
125
// Inverse identity transform and add.
126
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
127
                           int bs, int tx_type) {
128 129
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
130
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
131 132
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
133 134 135
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
136
    }
137 138 139
  }
}

clang-format's avatar
clang-format committed
140 141 142 143 144
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
145

clang-format's avatar
clang-format committed
146 147 148
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
149 150 151 152 153 154 155 156
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
157
    case IDTX:
Jingning Han's avatar
Jingning Han committed
158 159
    case V_DCT:
    case H_DCT:
160
    case V_ADST:
clang-format's avatar
clang-format committed
161
    case H_ADST: break;
162 163
    case FLIPADST_DCT:
    case FLIPADST_ADST:
164
    case V_FLIPADST:
165
      // flip UD
166
      FLIPUD_PTR(*dst, *dstride, sizey);
167 168 169
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
170
    case H_FLIPADST:
171
      // flip LR
172
      FLIPUD_PTR(*src, *sstride, sizex);
173 174 175
      break;
    case FLIPADST_FLIPADST:
      // flip UD
176
      FLIPUD_PTR(*dst, *dstride, sizey);
177
      // flip LR
178
      FLIPUD_PTR(*src, *sstride, sizex);
179
      break;
clang-format's avatar
clang-format committed
180
    default: assert(0); break;
181 182 183
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
184
#if CONFIG_AOM_HIGHBITDEPTH
185
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
186
                                  int stride, int bs, int tx_type, int bd) {
187 188 189
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
190

Debargha Mukherjee's avatar
Debargha Mukherjee committed
191
  if (tx_type == IDTX) {
192 193
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
194 195 196
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
197
    }
198 199
  }
}
200

clang-format's avatar
clang-format committed
201 202 203
static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
                                 int *sstride, int tx_type, int sizey,
                                 int sizex) {
204 205 206 207 208 209 210 211
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
212 213 214
    case IDTX:
    case V_DCT:
    case H_DCT:
215
    case V_ADST:
clang-format's avatar
clang-format committed
216
    case H_ADST: break;
217 218
    case FLIPADST_DCT:
    case FLIPADST_ADST:
219
    case V_FLIPADST:
220
      // flip UD
221
      FLIPUD_PTR(*dst, *dstride, sizey);
222 223 224
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
225
    case H_FLIPADST:
226
      // flip LR
227
      FLIPUD_PTR(*src, *sstride, sizex);
228 229 230
      break;
    case FLIPADST_FLIPADST:
      // flip UD
231
      FLIPUD_PTR(*dst, *dstride, sizey);
232
      // flip LR
233
      FLIPUD_PTR(*src, *sstride, sizex);
234
      break;
clang-format's avatar
clang-format committed
235
    default: assert(0); break;
236 237
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
238
#endif  // CONFIG_AOM_HIGHBITDEPTH
239 240
#endif  // CONFIG_EXT_TX

Yaowu Xu's avatar
Yaowu Xu committed
241 242
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
243
  static const transform_2d IHT_4[] = {
clang-format's avatar
clang-format committed
244 245 246
    { idct4_c, idct4_c },    // DCT_DCT
    { iadst4_c, idct4_c },   // ADST_DCT
    { idct4_c, iadst4_c },   // DCT_ADST
247
    { iadst4_c, iadst4_c },  // ADST_ADST
248
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
249 250
    { iadst4_c, idct4_c },   // FLIPADST_DCT
    { idct4_c, iadst4_c },   // DCT_FLIPADST
251 252 253 254
    { iadst4_c, iadst4_c },  // FLIPADST_FLIPADST
    { iadst4_c, iadst4_c },  // ADST_FLIPADST
    { iadst4_c, iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },  // IDTX
clang-format's avatar
clang-format committed
255 256
    { idct4_c, iidtx4_c },   // V_DCT
    { iidtx4_c, idct4_c },   // H_DCT
257 258 259 260
    { iadst4_c, iidtx4_c },  // V_ADST
    { iidtx4_c, iadst4_c },  // H_ADST
    { iadst4_c, iidtx4_c },  // V_FLIPADST
    { iidtx4_c, iadst4_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
261
#endif                       // CONFIG_EXT_TX
262 263 264
  };

  int i, j;
265 266 267 268
  tran_low_t tmp;
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
269 270 271

  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
272
    IHT_4[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
273
    input += 4;
274 275 276
  }

  // transpose
clang-format's avatar
clang-format committed
277
  for (i = 1; i < 4; i++) {
278
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
279
      tmp = out[i][j];
280 281 282
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
283 284 285 286
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
287 288 289 290
    IHT_4[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
291
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
292 293 294 295
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
296
    for (j = 0; j < 4; ++j) {
297 298 299
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
300 301 302 303
    }
  }
}

304
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
305 306
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
307
  static const transform_2d IHT_4x8[] = {
clang-format's avatar
clang-format committed
308 309 310
    { idct8_c, idct4_c },    // DCT_DCT
    { iadst8_c, idct4_c },   // ADST_DCT
    { idct8_c, iadst4_c },   // DCT_ADST
311
    { iadst8_c, iadst4_c },  // ADST_ADST
clang-format's avatar
clang-format committed
312 313
    { iadst8_c, idct4_c },   // FLIPADST_DCT
    { idct8_c, iadst4_c },   // DCT_FLIPADST
314 315 316 317
    { iadst8_c, iadst4_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst4_c },  // ADST_FLIPADST
    { iadst8_c, iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },  // IDTX
clang-format's avatar
clang-format committed
318 319
    { idct8_c, iidtx4_c },   // V_DCT
    { iidtx8_c, idct4_c },   // H_DCT
320 321 322 323 324 325
    { iadst8_c, iidtx4_c },  // V_ADST
    { iidtx8_c, iadst4_c },  // H_ADST
    { iadst8_c, iidtx4_c },  // V_FLIPADST
    { iidtx8_c, iadst4_c },  // H_FLIPADST
  };

326 327
  const int n = 4;
  const int n2 = 8;
328 329 330
  int i, j;
  tran_low_t out[4][8], outtmp[4];
  tran_low_t *outp = &out[0][0];
331
  int outstride = n2;
332 333

  // inverse transform row vectors and transpose
334
  for (i = 0; i < n2; ++i) {
335
    IHT_4x8[tx_type].rows(input, outtmp);
336
    for (j = 0; j < n; ++j)
337
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
338
    input += n;
339 340 341
  }

  // inverse transform column vectors
342
  for (i = 0; i < n; ++i) {
343 344 345
    IHT_4x8[tx_type].cols(out[i], out[i]);
  }

346
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
347 348

  // Sum with the destination
349 350
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
351 352 353 354 355 356 357
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
358 359
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
360
  static const transform_2d IHT_8x4[] = {
clang-format's avatar
clang-format committed
361 362 363
    { idct4_c, idct8_c },    // DCT_DCT
    { iadst4_c, idct8_c },   // ADST_DCT
    { idct4_c, iadst8_c },   // DCT_ADST
364
    { iadst4_c, iadst8_c },  // ADST_ADST
clang-format's avatar
clang-format committed
365 366
    { iadst4_c, idct8_c },   // FLIPADST_DCT
    { idct4_c, iadst8_c },   // DCT_FLIPADST
367 368 369 370
    { iadst4_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst4_c, iadst8_c },  // ADST_FLIPADST
    { iadst4_c, iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
371 372
    { idct4_c, iidtx8_c },   // V_DCT
    { iidtx4_c, idct8_c },   // H_DCT
373 374 375 376 377
    { iadst4_c, iidtx8_c },  // V_ADST
    { iidtx4_c, iadst8_c },  // H_ADST
    { iadst4_c, iidtx8_c },  // V_FLIPADST
    { iidtx4_c, iadst8_c },  // H_FLIPADST
  };
378 379
  const int n = 4;
  const int n2 = 8;
380 381 382 383

  int i, j;
  tran_low_t out[8][4], outtmp[8];
  tran_low_t *outp = &out[0][0];
384
  int outstride = n;
385 386

  // inverse transform row vectors and transpose
387
  for (i = 0; i < n; ++i) {
388
    IHT_8x4[tx_type].rows(input, outtmp);
389
    for (j = 0; j < n2; ++j)
390
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
391
    input += n2;
392 393 394
  }

  // inverse transform column vectors
395
  for (i = 0; i < n2; ++i) {
396 397 398
    IHT_8x4[tx_type].cols(out[i], out[i]);
  }

399
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
400 401

  // Sum with the destination
402 403
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
404 405 406 407 408 409 410
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
411 412
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
413
  static const transform_2d IHT_8x16[] = {
clang-format's avatar
clang-format committed
414 415 416
    { idct16_c, idct8_c },    // DCT_DCT
    { iadst16_c, idct8_c },   // ADST_DCT
    { idct16_c, iadst8_c },   // DCT_ADST
417
    { iadst16_c, iadst8_c },  // ADST_ADST
clang-format's avatar
clang-format committed
418 419
    { iadst16_c, idct8_c },   // FLIPADST_DCT
    { idct16_c, iadst8_c },   // DCT_FLIPADST
420 421 422 423
    { iadst16_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst16_c, iadst8_c },  // ADST_FLIPADST
    { iadst16_c, iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
424 425
    { idct16_c, iidtx8_c },   // V_DCT
    { iidtx16_c, idct8_c },   // H_DCT
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
    { iadst16_c, iidtx8_c },  // V_ADST
    { iidtx16_c, iadst8_c },  // H_ADST
    { iadst16_c, iidtx8_c },  // V_FLIPADST
    { iidtx16_c, iadst8_c },  // H_FLIPADST
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
  tran_low_t out[8][16], outtmp[8];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_8x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
444
    input += n;
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_8x16[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
464 465
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
466
  static const transform_2d IHT_16x8[] = {
clang-format's avatar
clang-format committed
467 468 469
    { idct8_c, idct16_c },    // DCT_DCT
    { iadst8_c, idct16_c },   // ADST_DCT
    { idct8_c, iadst16_c },   // DCT_ADST
470
    { iadst8_c, iadst16_c },  // ADST_ADST
clang-format's avatar
clang-format committed
471 472
    { iadst8_c, idct16_c },   // FLIPADST_DCT
    { idct8_c, iadst16_c },   // DCT_FLIPADST
473 474 475 476
    { iadst8_c, iadst16_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst16_c },  // ADST_FLIPADST
    { iadst8_c, iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },  // IDTX
clang-format's avatar
clang-format committed
477 478
    { idct8_c, iidtx16_c },   // V_DCT
    { iidtx8_c, idct16_c },   // H_DCT
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
    { iadst8_c, iidtx16_c },  // V_ADST
    { iidtx8_c, iadst16_c },  // H_ADST
    { iadst8_c, iidtx16_c },  // V_FLIPADST
    { iidtx8_c, iadst16_c },  // H_FLIPADST
  };
  const int n = 8;
  const int n2 = 16;

  int i, j;
  tran_low_t out[16][8], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
497
    input += n2;
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_16x8[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
517 518
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
519
  static const transform_2d IHT_16x32[] = {
clang-format's avatar
clang-format committed
520 521 522
    { idct32_c, idct16_c },         // DCT_DCT
    { ihalfright32_c, idct16_c },   // ADST_DCT
    { idct32_c, iadst16_c },        // DCT_ADST
523
    { ihalfright32_c, iadst16_c },  // ADST_ADST
clang-format's avatar
clang-format committed
524 525
    { ihalfright32_c, idct16_c },   // FLIPADST_DCT
    { idct32_c, iadst16_c },        // DCT_FLIPADST
526 527 528
    { ihalfright32_c, iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, iadst16_c },  // FLIPADST_ADST
clang-format's avatar
clang-format committed
529 530 531
    { iidtx32_c, iidtx16_c },       // IDTX
    { idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, idct16_c },        // H_DCT
532
    { ihalfright32_c, iidtx16_c },  // V_ADST
clang-format's avatar
clang-format committed
533
    { iidtx32_c, iadst16_c },       // H_ADST
534
    { ihalfright32_c, iidtx16_c },  // V_FLIPADST
clang-format's avatar
clang-format committed
535
    { iidtx32_c, iadst16_c },       // H_FLIPADST
536 537 538 539 540 541 542 543 544 545 546 547 548 549
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
  tran_low_t out[16][32], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
550
    input += n;
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_16x32[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
570 571
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
572
  static const transform_2d IHT_32x16[] = {
clang-format's avatar
clang-format committed
573 574 575
    { idct16_c, idct32_c },         // DCT_DCT
    { iadst16_c, idct32_c },        // ADST_DCT
    { idct16_c, ihalfright32_c },   // DCT_ADST
576
    { iadst16_c, ihalfright32_c },  // ADST_ADST
clang-format's avatar
clang-format committed
577 578
    { iadst16_c, idct32_c },        // FLIPADST_DCT
    { idct16_c, ihalfright32_c },   // DCT_FLIPADST
579 580 581
    { iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { iadst16_c, ihalfright32_c },  // FLIPADST_ADST
clang-format's avatar
clang-format committed
582 583 584 585
    { iidtx16_c, iidtx32_c },       // IDTX
    { idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, idct32_c },        // H_DCT
    { iadst16_c, iidtx32_c },       // V_ADST
586
    { iidtx16_c, ihalfright32_c },  // H_ADST
clang-format's avatar
clang-format committed
587
    { iadst16_c, iidtx32_c },       // V_FLIPADST
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
    { iidtx16_c, ihalfright32_c },  // H_FLIPADST
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
  tran_low_t out[32][16], outtmp[32];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
603
    input += n2;
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_32x16[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}
#endif  // CONFIG_EXT_TX

Yaowu Xu's avatar
Yaowu Xu committed
624 625
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
626
  static const transform_2d IHT_8[] = {
clang-format's avatar
clang-format committed
627 628 629
    { idct8_c, idct8_c },    // DCT_DCT
    { iadst8_c, idct8_c },   // ADST_DCT
    { idct8_c, iadst8_c },   // DCT_ADST
630
    { iadst8_c, iadst8_c },  // ADST_ADST
631
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
632 633
    { iadst8_c, idct8_c },   // FLIPADST_DCT
    { idct8_c, iadst8_c },   // DCT_FLIPADST
634 635 636 637
    { iadst8_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst8_c },  // ADST_FLIPADST
    { iadst8_c, iadst8_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
638 639
    { idct8_c, iidtx8_c },   // V_DCT
    { iidtx8_c, idct8_c },   // H_DCT
640 641 642 643
    { iadst8_c, iidtx8_c },  // V_ADST
    { iidtx8_c, iadst8_c },  // H_ADST
    { iadst8_c, iidtx8_c },  // V_FLIPADST
    { iidtx8_c, iadst8_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
644
#endif                       // CONFIG_EXT_TX
645 646
  };

Jingning Han's avatar
Jingning Han committed
647
  int i, j;
648 649 650 651
  tran_low_t tmp;
  tran_low_t out[8][8];
  tran_low_t *outp = &out[0][0];
  int outstride = 8;
Jingning Han's avatar
Jingning Han committed
652 653 654

  // inverse transform row vectors
  for (i = 0; i < 8; ++i) {
655
    IHT_8[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
656
    input += 8;
657 658 659
  }

  // transpose
clang-format's avatar
clang-format committed
660
  for (i = 1; i < 8; i++) {
661
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
662
      tmp = out[i][j];
663 664 665
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
Jingning Han's avatar
Jingning Han committed
666 667 668 669
  }

  // inverse transform column vectors
  for (i = 0; i < 8; ++i) {
670 671 672 673
    IHT_8[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
674
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
675 676 677 678
#endif

  // Sum with the destination
  for (i = 0; i < 8; ++i) {
Jingning Han's avatar
Jingning Han committed
679
    for (j = 0; j < 8; ++j) {
680 681 682
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Jingning Han's avatar
Jingning Han committed
683 684 685 686
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
687 688
void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
689
  static const transform_2d IHT_16[] = {
clang-format's avatar
clang-format committed
690 691 692
    { idct16_c, idct16_c },    // DCT_DCT
    { iadst16_c, idct16_c },   // ADST_DCT
    { idct16_c, iadst16_c },   // DCT_ADST
693
    { iadst16_c, iadst16_c },  // ADST_ADST
694
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
695 696
    { iadst16_c, idct16_c },   // FLIPADST_DCT
    { idct16_c, iadst16_c },   // DCT_FLIPADST
697 698 699 700
    { iadst16_c, iadst16_c },  // FLIPADST_FLIPADST
    { iadst16_c, iadst16_c },  // ADST_FLIPADST
    { iadst16_c, iadst16_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx16_c },  // IDTX
clang-format's avatar
clang-format committed
701 702
    { idct16_c, iidtx16_c },   // V_DCT
    { iidtx16_c, idct16_c },   // H_DCT
703 704 705 706
    { iadst16_c, iidtx16_c },  // V_ADST
    { iidtx16_c, iadst16_c },  // H_ADST
    { iadst16_c, iidtx16_c },  // V_FLIPADST
    { iidtx16_c, iadst16_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
707
#endif                         // CONFIG_EXT_TX
708
  };
709

Jingning Han's avatar
Jingning Han committed
710
  int i, j;
711 712 713 714 715 716 717 718
  tran_low_t tmp;
  tran_low_t out[16][16];
  tran_low_t *outp = &out[0][0];
  int outstride = 16;

  // inverse transform row vectors
  for (i = 0; i < 16; ++i) {
    IHT_16[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
719
    input += 16;
720 721 722
  }

  // transpose
clang-format's avatar
clang-format committed
723
  for (i = 1; i < 16; i++) {
724
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
725
      tmp = out[i][j];
726 727 728 729
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
  }
Jingning Han's avatar
Jingning Han committed
730

731
  // inverse transform column vectors
Jingning Han's avatar
Jingning Han committed
732
  for (i = 0; i < 16; ++i) {
733
    IHT_16[tx_type].cols(out[i], out[i]);
Jingning Han's avatar
Jingning Han committed
734 735
  }

736
#if CONFIG_EXT_TX
737
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
738 739 740
#endif

  // Sum with the destination
Jingning Han's avatar
Jingning Han committed
741 742
  for (i = 0; i < 16; ++i) {
    for (j = 0; j < 16; ++j) {
743 744 745
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Jingning Han's avatar
Jingning Han committed
746 747 748 749
    }
  }
}

750
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
751 752
void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             int tx_type) {
753
  static const transform_2d IHT_32[] = {
clang-format's avatar
clang-format committed
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
    { idct32_c, idct32_c },              // DCT_DCT
    { ihalfright32_c, idct32_c },        // ADST_DCT
    { idct32_c, ihalfright32_c },        // DCT_ADST
    { ihalfright32_c, ihalfright32_c },  // ADST_ADST
    { ihalfright32_c, idct32_c },        // FLIPADST_DCT
    { idct32_c, ihalfright32_c },        // DCT_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // ADST_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx32_c },            // IDTX
    { idct32_c, iidtx32_c },             // V_DCT
    { iidtx32_c, idct32_c },             // H_DCT
    { ihalfright32_c, iidtx16_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },       // H_FLIPADST
770 771 772 773 774 775 776 777 778 779 780
  };

  int i, j;
  tran_low_t tmp;
  tran_low_t out[32][32];
  tran_low_t *outp = &out[0][0];
  int outstride = 32;

  // inverse transform row vectors
  for (i = 0; i < 32; ++i) {
    IHT_32[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
781
    input += 32;
782 783 784
  }

  // transpose
clang-format's avatar
clang-format committed
785
  for (i = 1; i < 32; i++) {
786
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
787
      tmp = out[i][j];
788 789 790 791 792 793 794 795 796 797
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
  }

  // inverse transform column vectors
  for (i = 0; i < 32; ++i) {
    IHT_32[tx_type].cols(out[i], out[i]);
  }

798
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
799 800 801 802 803 804 805 806 807 808 809 810

  // Sum with the destination
  for (i = 0; i < 32; ++i) {
    for (j = 0; j < 32; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}
#endif  // CONFIG_EXT_TX

Jingning Han's avatar
Jingning Han committed
811
// idct
Yaowu Xu's avatar
Yaowu Xu committed
812 813
void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
814
  if (eob > 1)
Yaowu Xu's avatar
Yaowu Xu committed
815
    aom_idct4x4_16_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
816
  else
Yaowu Xu's avatar
Yaowu Xu committed
817
    aom_idct4x4_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
818 819
}

Yaowu Xu's avatar
Yaowu Xu committed
820 821
void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
822
  if (eob > 1)
Yaowu Xu's avatar
Yaowu Xu committed
823
    aom_iwht4x4_16_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
824
  else
Yaowu Xu's avatar
Yaowu Xu committed
825
    aom_iwht4x4_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
826 827
}

Yaowu Xu's avatar
Yaowu Xu committed
828 829
void av1_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
830 831 832 833 834
  // If dc is 1, then input[0] is the reconstructed value, do not need
  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.

  // The calculation can be simplified if there are not many non-zero dct
  // coefficients. Use eobs to decide what to do.
Yaowu Xu's avatar
Yaowu Xu committed
835
  // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Jingning Han's avatar
Jingning Han committed
836 837 838
  // Combine that with code here.
  if (eob == 1)
    // DC only DCT coefficient
Yaowu Xu's avatar
Yaowu Xu committed
839
    aom_idct8x8_1_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
840
#if !CONFIG_ADAPT_SCAN
Jingning Han's avatar
Jingning Han committed
841
  else if (eob <= 12)
Yaowu Xu's avatar
Yaowu Xu committed
842
    aom_idct8x8_12_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
843
#endif
Jingning Han's avatar
Jingning Han committed
844
  else
Yaowu Xu's avatar
Yaowu Xu committed
845
    aom_idct8x8_64_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
846 847
}

Yaowu Xu's avatar
Yaowu Xu committed
848 849
void av1_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
Jingning Han's avatar
Jingning Han committed
850 851
  /* The calculation can be simplified if there are not many non-zero dct
   * coefficients. Use eobs to separate different cases. */
clang-format's avatar
clang-format committed
852
  if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xu's avatar
Yaowu Xu committed
853
    aom_idct16x16_1_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
854
#if !CONFIG_ADAPT_SCAN
Jingning Han's avatar
Jingning Han committed
855
  else if (eob <= 10)
Yaowu Xu's avatar
Yaowu Xu committed
856
    aom_idct16x16_10_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
857
#endif
Jingning Han's avatar
Jingning Han committed
858
  else
Yaowu Xu's avatar
Yaowu Xu committed
859
    aom_idct16x16_256_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
860 861
}

Yaowu Xu's avatar
Yaowu Xu committed
862 863
void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
Angie Chiang's avatar
Angie Chiang committed
864 865
  if (eob == 1) aom_idct32x32_1_add(input, dest, stride);
#if !CONFIG_ADAPT_SCAN
Jingning Han's avatar
Jingning Han committed
866 867
  else if (eob <= 34)
    // non-zero coeff only in upper-left 8x8
Yaowu Xu's avatar
Yaowu Xu committed
868
    aom_idct32x32_34_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
869
#endif
Jingning Han's avatar
Jingning Han committed
870
  else
Yaowu Xu's avatar
Yaowu Xu committed
871
    aom_idct32x32_1024_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
872 873
}

Yaowu Xu's avatar
Yaowu Xu committed
874 875
void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
                          int eob, TX_TYPE tx_type, int lossless) {
876 877
  if (lossless) {
    assert(tx_type == DCT_DCT);
Yaowu Xu's avatar
Yaowu Xu committed
878
    av1_iwht4x4_add(input, dest, stride, eob);
879 880 881 882
    return;
  }

  switch (tx_type) {
Yaowu Xu's avatar
Yaowu Xu committed
883
    case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
884 885
    case ADST_DCT:
    case DCT_ADST:
Yaowu Xu's avatar
Yaowu Xu committed
886
    case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
887 888 889 890 891
#if CONFIG_EXT_TX
    case FLIPADST_DCT:
    case DCT_FLIPADST:
    case FLIPADST_FLIPADST:
    case ADST_FLIPADST:
Yaowu Xu's avatar
Yaowu Xu committed
892
    case FLIPADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
893
    case V_DCT:
894 895 896 897 898
    case H_DCT:
    case V_ADST:
    case H_ADST:
    case V_FLIPADST:
    case H_FLIPADST: