idct.c 63.3 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17 18
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23 24
int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
                 const TX_SIZE tx_size) {
clang-format's avatar
clang-format committed
25
  (void)tx_type;
Yaowu Xu's avatar
Yaowu Xu committed
26
#if CONFIG_AOM_HIGHBITDEPTH
27
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
28
    return txsize_sqr_up_map[tx_size] == TX_32X32;
29 30 31 32
  }
#else
  (void)xd;
#endif
33
  return txsize_sqr_up_map[tx_size] == TX_32X32;
34 35
}

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37 38 39 40 41 42 43 44
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 4; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
45
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46 47 48 49 50 51 52 53 54 55
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
56
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
57 58
}

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
// For use in lieu of DST
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  idct16_c(inputhalf, output + 16);
  // Note overall scaling factor is 4 times orthogonal
}

Yaowu Xu's avatar
Yaowu Xu committed
74
#if CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjee's avatar
Debargha Mukherjee committed
75 76 77 78
static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
  for (i = 0; i < 4; ++i)
clang-format's avatar
clang-format committed
79 80
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
81 82 83 84 85
}

static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
clang-format's avatar
clang-format committed
86 87
  (void)bd;
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
88 89 90
}

static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
clang-format's avatar
clang-format committed
91
                             int bd) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
92 93
  int i;
  for (i = 0; i < 16; ++i)
clang-format's avatar
clang-format committed
94 95
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
96 97 98 99 100
}

static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
                             int bd) {
  int i;
clang-format's avatar
clang-format committed
101 102
  (void)bd;
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
103 104
}

105 106 107 108 109 110 111 112 113
static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
                                  int bd) {
  int i;
  tran_low_t inputhalf[16];
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
clang-format's avatar
clang-format committed
114 115
    inputhalf[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
116
  }
Yaowu Xu's avatar
Yaowu Xu committed
117
  aom_highbd_idct16_c(inputhalf, output + 16, bd);
118 119
  // Note overall scaling factor is 4 times orthogonal
}
Yaowu Xu's avatar
Yaowu Xu committed
120
#endif  // CONFIG_AOM_HIGHBITDEPTH
121

Jingning Han's avatar
Jingning Han committed
122
// Inverse identity transform and add.
123
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
124
                           int bs, int tx_type) {
125 126
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
127
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
128 129
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
130 131 132
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
133
    }
134 135 136
  }
}

clang-format's avatar
clang-format committed
137 138 139 140 141
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
142

clang-format's avatar
clang-format committed
143 144 145
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
146 147 148 149 150 151 152 153
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
154
    case IDTX:
Jingning Han's avatar
Jingning Han committed
155 156
    case V_DCT:
    case H_DCT:
157
    case V_ADST:
clang-format's avatar
clang-format committed
158
    case H_ADST: break;
159 160
    case FLIPADST_DCT:
    case FLIPADST_ADST:
161
    case V_FLIPADST:
162
      // flip UD
163
      FLIPUD_PTR(*dst, *dstride, sizey);
164 165 166
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
167
    case H_FLIPADST:
168
      // flip LR
169
      FLIPUD_PTR(*src, *sstride, sizex);
170 171 172
      break;
    case FLIPADST_FLIPADST:
      // flip UD
173
      FLIPUD_PTR(*dst, *dstride, sizey);
174
      // flip LR
175
      FLIPUD_PTR(*src, *sstride, sizex);
176
      break;
clang-format's avatar
clang-format committed
177
    default: assert(0); break;
178 179 180
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
181
#if CONFIG_AOM_HIGHBITDEPTH
182
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
183
                                  int stride, int bs, int tx_type, int bd) {
184 185 186
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
187

Debargha Mukherjee's avatar
Debargha Mukherjee committed
188
  if (tx_type == IDTX) {
189 190
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
191 192 193
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
194
    }
195 196
  }
}
197

clang-format's avatar
clang-format committed
198 199 200
static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
                                 int *sstride, int tx_type, int sizey,
                                 int sizex) {
201 202 203 204 205 206 207 208
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
209 210 211
    case IDTX:
    case V_DCT:
    case H_DCT:
212
    case V_ADST:
clang-format's avatar
clang-format committed
213
    case H_ADST: break;
214 215
    case FLIPADST_DCT:
    case FLIPADST_ADST:
216
    case V_FLIPADST:
217
      // flip UD
218
      FLIPUD_PTR(*dst, *dstride, sizey);
219 220 221
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
222
    case H_FLIPADST:
223
      // flip LR
224
      FLIPUD_PTR(*src, *sstride, sizex);
225 226 227
      break;
    case FLIPADST_FLIPADST:
      // flip UD
228
      FLIPUD_PTR(*dst, *dstride, sizey);
229
      // flip LR
230
      FLIPUD_PTR(*src, *sstride, sizex);
231
      break;
clang-format's avatar
clang-format committed
232
    default: assert(0); break;
233 234
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
235
#endif  // CONFIG_AOM_HIGHBITDEPTH
236 237
#endif  // CONFIG_EXT_TX

Yaowu Xu's avatar
Yaowu Xu committed
238 239
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
240
  static const transform_2d IHT_4[] = {
clang-format's avatar
clang-format committed
241 242 243
    { idct4_c, idct4_c },    // DCT_DCT
    { iadst4_c, idct4_c },   // ADST_DCT
    { idct4_c, iadst4_c },   // DCT_ADST
244
    { iadst4_c, iadst4_c },  // ADST_ADST
245
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
246 247
    { iadst4_c, idct4_c },   // FLIPADST_DCT
    { idct4_c, iadst4_c },   // DCT_FLIPADST
248 249 250 251
    { iadst4_c, iadst4_c },  // FLIPADST_FLIPADST
    { iadst4_c, iadst4_c },  // ADST_FLIPADST
    { iadst4_c, iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },  // IDTX
clang-format's avatar
clang-format committed
252 253
    { idct4_c, iidtx4_c },   // V_DCT
    { iidtx4_c, idct4_c },   // H_DCT
254 255 256 257
    { iadst4_c, iidtx4_c },  // V_ADST
    { iidtx4_c, iadst4_c },  // H_ADST
    { iadst4_c, iidtx4_c },  // V_FLIPADST
    { iidtx4_c, iadst4_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
258
#endif                       // CONFIG_EXT_TX
259 260 261
  };

  int i, j;
262 263 264 265
  tran_low_t tmp;
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
266 267 268

  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
269
    IHT_4[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
270
    input += 4;
271 272 273
  }

  // transpose
clang-format's avatar
clang-format committed
274
  for (i = 1; i < 4; i++) {
275
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
276
      tmp = out[i][j];
277 278 279
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
280 281 282 283
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
284 285 286 287
    IHT_4[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
288
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
289 290 291 292
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
293
    for (j = 0; j < 4; ++j) {
294 295 296
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
297 298 299 300
    }
  }
}

301
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
302 303
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
304
  static const transform_2d IHT_4x8[] = {
clang-format's avatar
clang-format committed
305 306 307
    { idct8_c, idct4_c },    // DCT_DCT
    { iadst8_c, idct4_c },   // ADST_DCT
    { idct8_c, iadst4_c },   // DCT_ADST
308
    { iadst8_c, iadst4_c },  // ADST_ADST
clang-format's avatar
clang-format committed
309 310
    { iadst8_c, idct4_c },   // FLIPADST_DCT
    { idct8_c, iadst4_c },   // DCT_FLIPADST
311 312 313 314
    { iadst8_c, iadst4_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst4_c },  // ADST_FLIPADST
    { iadst8_c, iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },  // IDTX
clang-format's avatar
clang-format committed
315 316
    { idct8_c, iidtx4_c },   // V_DCT
    { iidtx8_c, idct4_c },   // H_DCT
317 318 319 320 321 322
    { iadst8_c, iidtx4_c },  // V_ADST
    { iidtx8_c, iadst4_c },  // H_ADST
    { iadst8_c, iidtx4_c },  // V_FLIPADST
    { iidtx8_c, iadst4_c },  // H_FLIPADST
  };

323 324
  const int n = 4;
  const int n2 = 8;
325 326 327
  int i, j;
  tran_low_t out[4][8], outtmp[4];
  tran_low_t *outp = &out[0][0];
328
  int outstride = n2;
329 330

  // inverse transform row vectors and transpose
331
  for (i = 0; i < n2; ++i) {
332
    IHT_4x8[tx_type].rows(input, outtmp);
333
    for (j = 0; j < n; ++j)
334
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
335
    input += n;
336 337 338
  }

  // inverse transform column vectors
339
  for (i = 0; i < n; ++i) {
340 341 342
    IHT_4x8[tx_type].cols(out[i], out[i]);
  }

343
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
344 345

  // Sum with the destination
346 347
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
348 349 350 351 352 353 354
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
355 356
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
357
  static const transform_2d IHT_8x4[] = {
clang-format's avatar
clang-format committed
358 359 360
    { idct4_c, idct8_c },    // DCT_DCT
    { iadst4_c, idct8_c },   // ADST_DCT
    { idct4_c, iadst8_c },   // DCT_ADST
361
    { iadst4_c, iadst8_c },  // ADST_ADST
clang-format's avatar
clang-format committed
362 363
    { iadst4_c, idct8_c },   // FLIPADST_DCT
    { idct4_c, iadst8_c },   // DCT_FLIPADST
364 365 366 367
    { iadst4_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst4_c, iadst8_c },  // ADST_FLIPADST
    { iadst4_c, iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
368 369
    { idct4_c, iidtx8_c },   // V_DCT
    { iidtx4_c, idct8_c },   // H_DCT
370 371 372 373 374
    { iadst4_c, iidtx8_c },  // V_ADST
    { iidtx4_c, iadst8_c },  // H_ADST
    { iadst4_c, iidtx8_c },  // V_FLIPADST
    { iidtx4_c, iadst8_c },  // H_FLIPADST
  };
375 376
  const int n = 4;
  const int n2 = 8;
377 378 379 380

  int i, j;
  tran_low_t out[8][4], outtmp[8];
  tran_low_t *outp = &out[0][0];
381
  int outstride = n;
382 383

  // inverse transform row vectors and transpose
384
  for (i = 0; i < n; ++i) {
385
    IHT_8x4[tx_type].rows(input, outtmp);
386
    for (j = 0; j < n2; ++j)
387
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
388
    input += n2;
389 390 391
  }

  // inverse transform column vectors
392
  for (i = 0; i < n2; ++i) {
393 394 395
    IHT_8x4[tx_type].cols(out[i], out[i]);
  }

396
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
397 398

  // Sum with the destination
399 400
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
401 402 403 404 405 406 407
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
408 409
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
410
  static const transform_2d IHT_8x16[] = {
clang-format's avatar
clang-format committed
411 412 413
    { idct16_c, idct8_c },    // DCT_DCT
    { iadst16_c, idct8_c },   // ADST_DCT
    { idct16_c, iadst8_c },   // DCT_ADST
414
    { iadst16_c, iadst8_c },  // ADST_ADST
clang-format's avatar
clang-format committed
415 416
    { iadst16_c, idct8_c },   // FLIPADST_DCT
    { idct16_c, iadst8_c },   // DCT_FLIPADST
417 418 419 420
    { iadst16_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst16_c, iadst8_c },  // ADST_FLIPADST
    { iadst16_c, iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
421 422
    { idct16_c, iidtx8_c },   // V_DCT
    { iidtx16_c, idct8_c },   // H_DCT
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
    { iadst16_c, iidtx8_c },  // V_ADST
    { iidtx16_c, iadst8_c },  // H_ADST
    { iadst16_c, iidtx8_c },  // V_FLIPADST
    { iidtx16_c, iadst8_c },  // H_FLIPADST
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
  tran_low_t out[8][16], outtmp[8];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_8x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
441
    input += n;
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_8x16[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
461 462
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
463
  static const transform_2d IHT_16x8[] = {
clang-format's avatar
clang-format committed
464 465 466
    { idct8_c, idct16_c },    // DCT_DCT
    { iadst8_c, idct16_c },   // ADST_DCT
    { idct8_c, iadst16_c },   // DCT_ADST
467
    { iadst8_c, iadst16_c },  // ADST_ADST
clang-format's avatar
clang-format committed
468 469
    { iadst8_c, idct16_c },   // FLIPADST_DCT
    { idct8_c, iadst16_c },   // DCT_FLIPADST
470 471 472 473
    { iadst8_c, iadst16_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst16_c },  // ADST_FLIPADST
    { iadst8_c, iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },  // IDTX
clang-format's avatar
clang-format committed
474 475
    { idct8_c, iidtx16_c },   // V_DCT
    { iidtx8_c, idct16_c },   // H_DCT
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
    { iadst8_c, iidtx16_c },  // V_ADST
    { iidtx8_c, iadst16_c },  // H_ADST
    { iadst8_c, iidtx16_c },  // V_FLIPADST
    { iidtx8_c, iadst16_c },  // H_FLIPADST
  };
  const int n = 8;
  const int n2 = 16;

  int i, j;
  tran_low_t out[16][8], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
494
    input += n2;
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_16x8[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
514 515
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
516
  static const transform_2d IHT_16x32[] = {
clang-format's avatar
clang-format committed
517 518 519
    { idct32_c, idct16_c },         // DCT_DCT
    { ihalfright32_c, idct16_c },   // ADST_DCT
    { idct32_c, iadst16_c },        // DCT_ADST
520
    { ihalfright32_c, iadst16_c },  // ADST_ADST
clang-format's avatar
clang-format committed
521 522
    { ihalfright32_c, idct16_c },   // FLIPADST_DCT
    { idct32_c, iadst16_c },        // DCT_FLIPADST
523 524 525
    { ihalfright32_c, iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, iadst16_c },  // FLIPADST_ADST
clang-format's avatar
clang-format committed
526 527 528
    { iidtx32_c, iidtx16_c },       // IDTX
    { idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, idct16_c },        // H_DCT
529
    { ihalfright32_c, iidtx16_c },  // V_ADST
clang-format's avatar
clang-format committed
530
    { iidtx32_c, iadst16_c },       // H_ADST
531
    { ihalfright32_c, iidtx16_c },  // V_FLIPADST
clang-format's avatar
clang-format committed
532
    { iidtx32_c, iadst16_c },       // H_FLIPADST
533 534 535 536 537 538 539 540 541 542 543 544 545 546
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
  tran_low_t out[16][32], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
547
    input += n;
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_16x32[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
567 568
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
569
  static const transform_2d IHT_32x16[] = {
clang-format's avatar
clang-format committed
570 571 572
    { idct16_c, idct32_c },         // DCT_DCT
    { iadst16_c, idct32_c },        // ADST_DCT
    { idct16_c, ihalfright32_c },   // DCT_ADST
573
    { iadst16_c, ihalfright32_c },  // ADST_ADST
clang-format's avatar
clang-format committed
574 575
    { iadst16_c, idct32_c },        // FLIPADST_DCT
    { idct16_c, ihalfright32_c },   // DCT_FLIPADST
576 577 578
    { iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { iadst16_c, ihalfright32_c },  // FLIPADST_ADST
clang-format's avatar
clang-format committed
579 580 581 582
    { iidtx16_c, iidtx32_c },       // IDTX
    { idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, idct32_c },        // H_DCT
    { iadst16_c, iidtx32_c },       // V_ADST
583
    { iidtx16_c, ihalfright32_c },  // H_ADST
clang-format's avatar
clang-format committed
584
    { iadst16_c, iidtx32_c },       // V_FLIPADST
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
    { iidtx16_c, ihalfright32_c },  // H_FLIPADST
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
  tran_low_t out[32][16], outtmp[32];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
600
    input += n2;
601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_32x16[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}
#endif  // CONFIG_EXT_TX

Yaowu Xu's avatar
Yaowu Xu committed
621 622
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
623
  static const transform_2d IHT_8[] = {
clang-format's avatar
clang-format committed
624 625 626
    { idct8_c, idct8_c },    // DCT_DCT
    { iadst8_c, idct8_c },   // ADST_DCT
    { idct8_c, iadst8_c },   // DCT_ADST
627
    { iadst8_c, iadst8_c },  // ADST_ADST
628
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
629 630
    { iadst8_c, idct8_c },   // FLIPADST_DCT
    { idct8_c, iadst8_c },   // DCT_FLIPADST
631 632 633 634
    { iadst8_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst8_c },  // ADST_FLIPADST
    { iadst8_c, iadst8_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
635 636
    { idct8_c, iidtx8_c },   // V_DCT
    { iidtx8_c, idct8_c },   // H_DCT
637 638 639 640
    { iadst8_c, iidtx8_c },  // V_ADST
    { iidtx8_c, iadst8_c },  // H_ADST
    { iadst8_c, iidtx8_c },  // V_FLIPADST
    { iidtx8_c, iadst8_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
641
#endif                       // CONFIG_EXT_TX
642 643
  };

Jingning Han's avatar
Jingning Han committed
644
  int i, j;
645 646 647 648
  tran_low_t tmp;
  tran_low_t out[8][8];
  tran_low_t *outp = &out[0][0];
  int outstride = 8;
Jingning Han's avatar
Jingning Han committed
649 650 651

  // inverse transform row vectors
  for (i = 0; i < 8; ++i) {
652
    IHT_8[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
653
    input += 8;
654 655 656
  }

  // transpose
clang-format's avatar
clang-format committed
657
  for (i = 1; i < 8; i++) {
658
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
659
      tmp = out[i][j];
660 661 662
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
Jingning Han's avatar
Jingning Han committed
663 664 665 666
  }

  // inverse transform column vectors
  for (i = 0; i < 8; ++i) {
667 668 669 670
    IHT_8[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
671
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
672 673 674 675
#endif

  // Sum with the destination
  for (i = 0; i < 8; ++i) {
Jingning Han's avatar
Jingning Han committed
676
    for (j = 0; j < 8; ++j) {
677 678 679
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Jingning Han's avatar
Jingning Han committed
680 681 682 683
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
684 685
void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
686
  static const transform_2d IHT_16[] = {
clang-format's avatar
clang-format committed
687 688 689
    { idct16_c, idct16_c },    // DCT_DCT
    { iadst16_c, idct16_c },   // ADST_DCT
    { idct16_c, iadst16_c },   // DCT_ADST
690
    { iadst16_c, iadst16_c },  // ADST_ADST
691
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
692 693
    { iadst16_c, idct16_c },   // FLIPADST_DCT
    { idct16_c, iadst16_c },   // DCT_FLIPADST
694 695 696 697
    { iadst16_c, iadst16_c },  // FLIPADST_FLIPADST
    { iadst16_c, iadst16_c },  // ADST_FLIPADST
    { iadst16_c, iadst16_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx16_c },  // IDTX
clang-format's avatar
clang-format committed
698 699
    { idct16_c, iidtx16_c },   // V_DCT
    { iidtx16_c, idct16_c },   // H_DCT
700 701 702 703
    { iadst16_c, iidtx16_c },  // V_ADST
    { iidtx16_c, iadst16_c },  // H_ADST
    { iadst16_c, iidtx16_c },  // V_FLIPADST
    { iidtx16_c, iadst16_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
704
#endif                         // CONFIG_EXT_TX
705
  };
706

Jingning Han's avatar
Jingning Han committed
707
  int i, j;
708 709 710 711 712 713 714 715
  tran_low_t tmp;
  tran_low_t out[16][16];
  tran_low_t *outp = &out[0][0];
  int outstride = 16;

  // inverse transform row vectors
  for (i = 0; i < 16; ++i) {
    IHT_16[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
716
    input += 16;
717 718 719
  }

  // transpose
clang-format's avatar
clang-format committed
720
  for (i = 1; i < 16; i++) {
721
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
722
      tmp = out[i][j];
723 724 725 726
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
  }
Jingning Han's avatar
Jingning Han committed
727

728
  // inverse transform column vectors
Jingning Han's avatar
Jingning Han committed
729
  for (i = 0; i < 16; ++i) {
730
    IHT_16[tx_type].cols(out[i], out[i]);
Jingning Han's avatar
Jingning Han committed
731 732
  }

733
#if CONFIG_EXT_TX
734
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
735 736 737
#endif

  // Sum with the destination
Jingning Han's avatar
Jingning Han committed
738 739
  for (i = 0; i < 16; ++i) {
    for (j = 0; j < 16; ++j) {
740 741 742
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Jingning Han's avatar
Jingning Han committed
743 744 745 746
    }
  }
}

747
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
748 749
void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             int tx_type) {
750
  static const transform_2d IHT_32[] = {
clang-format's avatar
clang-format committed
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766
    { idct32_c, idct32_c },              // DCT_DCT
    { ihalfright32_c, idct32_c },        // ADST_DCT
    { idct32_c, ihalfright32_c },        // DCT_ADST
    { ihalfright32_c, ihalfright32_c },  // ADST_ADST
    { ihalfright32_c, idct32_c },        // FLIPADST_DCT
    { idct32_c, ihalfright32_c },        // DCT_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // ADST_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx32_c },            // IDTX
    { idct32_c, iidtx32_c },             // V_DCT
    { iidtx32_c, idct32_c },             // H_DCT
    { ihalfright32_c, iidtx16_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },       // H_FLIPADST
767 768 769 770 771 772 773 774 775 776 777
  };

  int i, j;
  tran_low_t tmp;
  tran_low_t out[32][32];
  tran_low_t *outp = &out[0][0];
  int outstride = 32;

  // inverse transform row vectors
  for (i = 0; i < 32; ++i) {
    IHT_32[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
778
    input += 32;
779 780 781
  }

  // transpose
clang-format's avatar
clang-format committed
782
  for (i = 1; i < 32; i++) {
783
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
784
      tmp = out[i][j];
785 786 787 788 789 790 791 792 793 794
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
  }

  // inverse transform column vectors
  for (i = 0; i < 32; ++i) {
    IHT_32[tx_type].cols(out[i], out[i]);
  }

795
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
796 797 798 799 800 801 802 803 804 805 806 807

  // Sum with the destination
  for (i = 0; i < 32; ++i) {
    for (j = 0; j < 32; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}
#endif  // CONFIG_EXT_TX

Jingning Han's avatar
Jingning Han committed
808
// idct
Yaowu Xu's avatar
Yaowu Xu committed
809 810
void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
811
  if (eob > 1)
Yaowu Xu's avatar
Yaowu Xu committed
812
    aom_idct4x4_16_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
813
  else
Yaowu Xu's avatar
Yaowu Xu committed
814
    aom_idct4x4_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
815 816
}

Yaowu Xu's avatar
Yaowu Xu committed
817 818
void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
819
  if (eob > 1)
Yaowu Xu's avatar
Yaowu Xu committed
820
    aom_iwht4x4_16_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
821
  else
Yaowu Xu's avatar
Yaowu Xu committed
822
    aom_iwht4x4_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
823 824
}

Yaowu Xu's avatar
Yaowu Xu committed
825 826
void av1_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
827 828 829 830 831
  // If dc is 1, then input[0] is the reconstructed value, do not need
  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.

  // The calculation can be simplified if there are not many non-zero dct
  // coefficients. Use eobs to decide what to do.
Yaowu Xu's avatar
Yaowu Xu committed
832
  // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Jingning Han's avatar
Jingning Han committed
833 834 835
  // Combine that with code here.
  if (eob == 1)
    // DC only DCT coefficient
Yaowu Xu's avatar
Yaowu Xu committed
836
    aom_idct8x8_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
837
  else if (eob <= 12)
Yaowu Xu's avatar
Yaowu Xu committed
838
    aom_idct8x8_12_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
839
  else
Yaowu Xu's avatar
Yaowu Xu committed
840
    aom_idct8x8_64_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
841 842
}

Yaowu Xu's avatar
Yaowu Xu committed
843 844
void av1_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
Jingning Han's avatar
Jingning Han committed
845 846
  /* The calculation can be simplified if there are not many non-zero dct
   * coefficients. Use eobs to separate different cases. */
clang-format's avatar
clang-format committed
847
  if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xu's avatar
Yaowu Xu committed
848
    aom_idct16x16_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
849
  else if (eob <= 10)
Yaowu Xu's avatar
Yaowu Xu committed
850
    aom_idct16x16_10_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
851
  else
Yaowu Xu's avatar
Yaowu Xu committed
852
    aom_idct16x16_256_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
853 854
}

Yaowu Xu's avatar
Yaowu Xu committed
855 856
void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
Jingning Han's avatar
Jingning Han committed
857
  if (eob == 1)
Yaowu Xu's avatar
Yaowu Xu committed
858
    aom_idct32x32_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
859 860
  else if (eob <= 34)
    // non-zero coeff only in upper-left 8x8
Yaowu Xu's avatar
Yaowu Xu committed
861
    aom_idct32x32_34_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
862
  else
Yaowu Xu's avatar
Yaowu Xu committed
863
    aom_idct32x32_1024_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
864 865
}

Yaowu Xu's avatar
Yaowu Xu committed
866 867
void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
                          int eob, TX_TYPE tx_type, int lossless) {
868 869
  if (lossless) {
    assert(tx_type == DCT_DCT);
Yaowu Xu's avatar
Yaowu Xu committed
870
    av1_iwht4x4_add(input, dest, stride, eob);
871 872 873 874
    return;
  }

  switch (tx_type) {
Yaowu Xu's avatar
Yaowu Xu committed
875
    case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
876 877
    case ADST_DCT:
    case DCT_ADST:
Yaowu Xu's avatar
Yaowu Xu committed
878
    case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
879 880 881 882 883
#if CONFIG_EXT_TX
    case FLIPADST_DCT:
    case DCT_FLIPADST:
    case FLIPADST_FLIPADST:
    case ADST_FLIPADST:
Yaowu Xu's avatar
Yaowu Xu committed
884
    case FLIPADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
885
    case V_DCT:
886 887 888 889 890
    case H_DCT:
    case V_ADST:
    case H_ADST:
    case V_FLIPADST:
    case H_FLIPADST:
891
      // Use C version since DST only exists in C code
Yaowu Xu's avatar
Yaowu Xu committed
892
      av1_iht4x4_16_add_c(input, dest, stride, tx_type);
893
      break;
clang-format's avatar
clang-format committed
894
    case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
895
#endif  // CONFIG_EXT_TX
clang-format's avatar
clang-format committed
896
    default: assert(0); break;
hui su's avatar
hui su committed
897
  }
Jingning Han's avatar
Jingning Han committed
898 899
}