idct.c 73.8 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24 25 26 27 28 29 30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31 32
}

33 34 35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37 38 39 40 41 42 43 44
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 4; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
45
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46 47 48 49 50 51 52 53 54 55
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
56
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
57
}
58 59 60 61 62 63 64 65

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 64; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif  // CONFIG_TX64X64
66
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
67

68
// For use in lieu of ADST
69 70 71 72 73 74 75
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
76 77 78
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
79
  aom_idct16_c(inputhalf, output + 16);
80 81 82
  // Note overall scaling factor is 4 times orthogonal
}

83 84 85 86 87
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
88
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
89 90 91 92 93 94 95
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
96
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
116
// Inverse identity transform and add.
117
#if CONFIG_EXT_TX
118
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
119
                           int bs, int tx_type) {
120
  int r, c;
121
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
122
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
123 124
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
125 126 127
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
128
    }
129 130
  }
}
131
#endif  // CONFIG_EXT_TX
132

clang-format's avatar
clang-format committed
133 134 135 136 137
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
138

139
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
140 141 142
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
143 144 145 146 147 148 149 150
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
151
    case IDTX:
Jingning Han's avatar
Jingning Han committed
152 153
    case V_DCT:
    case H_DCT:
154
    case V_ADST:
clang-format's avatar
clang-format committed
155
    case H_ADST: break;
156 157
    case FLIPADST_DCT:
    case FLIPADST_ADST:
158
    case V_FLIPADST:
159
      // flip UD
160
      FLIPUD_PTR(*dst, *dstride, sizey);
161 162 163
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
164
    case H_FLIPADST:
165
      // flip LR
166
      FLIPUD_PTR(*src, *sstride, sizex);
167 168 169
      break;
    case FLIPADST_FLIPADST:
      // flip UD
170
      FLIPUD_PTR(*dst, *dstride, sizey);
171
      // flip LR
172
      FLIPUD_PTR(*src, *sstride, sizex);
173
      break;
clang-format's avatar
clang-format committed
174
    default: assert(0); break;
175 176
  }
}
177
#endif  // CONFIG_EXT_TX
178

179
#if CONFIG_HIGHBITDEPTH
180
#if CONFIG_EXT_TX && CONFIG_TX64X64
181
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
182
                                  int stride, int bs, int tx_type, int bd) {
183 184 185
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
186

Debargha Mukherjee's avatar
Debargha Mukherjee committed
187
  if (tx_type == IDTX) {
188 189
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
190 191 192
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
193
    }
194 195
  }
}
196
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
197
#endif  // CONFIG_HIGHBITDEPTH
198

Lester Lu's avatar
Lester Lu committed
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
int get_inv_lgt4(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
    return 1;
  }
  return 0;
}

int get_inv_lgt8(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
249
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
250 251
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
252
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
253 254 255 256
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
257
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
258 259 260 261 262 263 264 265 266 267 268 269
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
270
#endif
271 272 273
  };

  int i, j;
274
  tran_low_t tmp[4][4];
275 276 277
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
278

279 280 281 282
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
283 284 285 286 287 288 289
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_4[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4[tx_type].rows, param, lgtmtx_row, 4);
#endif

290 291
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
292 293 294 295 296 297
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
298
    input += 4;
299 300 301
  }

  // transpose
302 303 304
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
305
    }
306 307 308 309
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
310 311 312 313 314 315
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
316 317 318
  }

#if CONFIG_EXT_TX
319
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
320 321 322 323
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
324
    for (j = 0; j < 4; ++j) {
325 326 327
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
328 329 330 331
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
332
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
333 334
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
335 336 337
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
338
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
339 340 341 342
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
343
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
344 345 346 347 348 349 350 351 352 353 354 355
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
356
#endif
357 358
  };

359 360
  const int n = 4;
  const int n2 = 8;
361
  int i, j;
362
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
363
  tran_low_t *outp = &out[0][0];
364
  int outstride = n2;
365

Lester Lu's avatar
Lester Lu committed
366 367 368 369 370 371 372
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
  int use_lgt_col = get_inv_lgt8(IHT_4x8[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4x8[tx_type].rows, param, lgtmtx_row, 8);
#endif

373
  // inverse transform row vectors and transpose
374
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
375 376 377 378 379 380
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
381
    for (j = 0; j < n; ++j)
382
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
383
    input += n;
384 385 386
  }

  // inverse transform column vectors
387
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
388 389 390 391 392 393
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
394 395
  }

396
#if CONFIG_EXT_TX
397
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
398
#endif
399 400

  // Sum with the destination
401 402
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
403 404 405 406 407 408 409
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
410
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
411 412
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
413 414 415
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
416
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
417 418 419 420
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
421
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
422 423 424 425 426 427 428 429 430 431 432 433
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
434
#endif
435
  };
436

437 438
  const int n = 4;
  const int n2 = 8;
439 440

  int i, j;
441
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
442
  tran_low_t *outp = &out[0][0];
443
  int outstride = n;
444

Lester Lu's avatar
Lester Lu committed
445 446 447 448 449 450 451
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_8x4[tx_type].cols, param, lgtmtx_col, 8);
  int use_lgt_row = get_inv_lgt8(IHT_8x4[tx_type].rows, param, lgtmtx_row, 4);
#endif

452
  // inverse transform row vectors and transpose
453
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
454 455 456 457 458 459
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
460
    for (j = 0; j < n2; ++j)
461
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
462
    input += n2;
463 464 465
  }

  // inverse transform column vectors
466
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
467 468 469 470 471 472
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
473 474
  }

475
#if CONFIG_EXT_TX
476
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
477
#endif
478 479

  // Sum with the destination
480 481
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
482 483 484 485 486 487 488
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

489
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
490 491
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
492 493 494
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
519
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
520 521 522
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
523 524 525 526 527
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt4(IHT_4x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

528 529
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
530 531 532 533 534 535
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
536
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
537 538 539 540
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
541 542 543
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
560 561
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
562 563 564
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
585

586 587 588 589
  const int n = 4;
  const int n4 = 16;

  int i, j;
590
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
591 592 593
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
594 595 596 597 598
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt4(IHT_16x4[tx_type].cols, param, lgtmtx_col, 16);
#endif

599 600 601
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
602
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
603 604 605 606
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
607 608 609 610 611 612 613 614
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
630
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
631 632
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
633 634 635
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
636
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
637 638 639 640
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
641
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
642 643 644 645 646 647 648 649 650 651 652 653
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
654
#endif
655 656 657 658 659
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
660
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
661 662 663
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
664 665 666 667 668
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt8(IHT_8x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

669 670
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
671 672 673 674 675 676
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
677
    for (j = 0; j < n; ++j)
678
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
679
    input += n;
680 681 682 683
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
684
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
685 686
  }

687
#if CONFIG_EXT_TX
688
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
689
#endif
690 691 692 693 694 695 696 697 698 699 700

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
701
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
702 703
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
704 705 706
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
707
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
708 709 710 711
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
712
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
713 714 715 716 717 718 719 720 721 722 723 724
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
725
#endif
726
  };
727

728 729 730 731
  const int n = 8;
  const int n2 = 16;

  int i, j;
732
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
733 734 735
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
736 737 738 739 740
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt8(IHT_16x8[tx_type].cols, param, lgtmtx_col, 16);
#endif

741 742 743 744
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
745
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
746
    input += n2;
747 748 749 750
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
751 752 753 754 755 756
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
757 758
  }

759
#if CONFIG_EXT_TX
760
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
761
#endif
762 763 764 765 766 767 768 769 770 771 772

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

773
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
774 775
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
776 777 778
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
803
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
804 805 806
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
807 808 809 810 811
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
  int use_lgt_row = get_inv_lgt8(IHT_8x32[tx_type].rows, param, lgtmtx_row, 32);
#endif

812 813
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
814 815 816 817 818 819
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
820
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
821 822 823 824
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
825 826 827
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
844 845
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
846 847 848
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
869

870 871 872 873
  const int n = 8;
  const int n4 = 32;

  int i, j;
874
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
875 876 877
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
878 879 880 881 882
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
  int use_lgt_col = get_inv_lgt4(IHT_32x8[tx_type].cols, param, lgtmtx_col, 32);
#endif

883 884 885
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
886
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
887 888 889 890
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
891 892 893 894 895 896 897 898
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
899 900 901 902 903 904 905 906 907 908 909 910 911 912 913

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
914
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
915 916
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
917 918 919
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
920
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
921 922 923 924
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
925
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
926 927 928 929 930 931 932 933 934 935 936 937
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
938
#endif
939 940 941 942 943
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
944
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
945 946 947 948 949 950 951
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
952
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
953
    input += n;
954 955 956
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
957
  for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
958

959
#if CONFIG_EXT_TX
960
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
961
#endif
962 963 964 965 966 967 968 969 970 971 972

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
973
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
974 975
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
976 977 978
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
979
  static const transform_2d IHT_32x16[] = {
Luca Barbato's avatar
Luca Barbato committed
980 981 982 983
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
984
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
985 986 987 988 989 990 991 992 993 994 995 996
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST
997
#endif
998 999 1000 1001 1002
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
1003
  tran_low_t out[32][16], tmp[32][16], outtmp[32];
1004 1005 1006 1007 1008 1009 1010
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
1011
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
1012
    input += n2;
1013 1014 1015
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1016
  for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
1017

1018
#if CONFIG_EXT_TX
1019
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);