idct.c 74 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24 25 26 27 28 29 30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31 32
}

33 34 35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37 38 39 40 41 42 43 44
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 4; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
45
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46 47 48 49 50 51 52 53 54 55
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
56
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
57
}
58 59 60 61 62 63 64 65

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 64; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif  // CONFIG_TX64X64
66
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
67

68
// For use in lieu of ADST
69 70 71 72 73 74 75
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
76 77 78
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
79
  aom_idct16_c(inputhalf, output + 16);
80 81 82
  // Note overall scaling factor is 4 times orthogonal
}

83 84 85 86 87
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
88
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
89 90 91 92 93 94 95
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
96
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
116
// Inverse identity transform and add.
117
#if CONFIG_EXT_TX
118
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
119
                           int bs, int tx_type) {
120
  int r, c;
121
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
122
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
123 124
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
125 126 127
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
128
    }
129 130
  }
}
131
#endif  // CONFIG_EXT_TX
132

clang-format's avatar
clang-format committed
133 134 135 136 137
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
138

139
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
140 141 142
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
143 144 145 146 147 148 149 150
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
151
    case IDTX:
Jingning Han's avatar
Jingning Han committed
152 153
    case V_DCT:
    case H_DCT:
154
    case V_ADST:
clang-format's avatar
clang-format committed
155
    case H_ADST: break;
156 157
    case FLIPADST_DCT:
    case FLIPADST_ADST:
158
    case V_FLIPADST:
159
      // flip UD
160
      FLIPUD_PTR(*dst, *dstride, sizey);
161 162 163
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
164
    case H_FLIPADST:
165
      // flip LR
166
      FLIPUD_PTR(*src, *sstride, sizex);
167 168 169
      break;
    case FLIPADST_FLIPADST:
      // flip UD
170
      FLIPUD_PTR(*dst, *dstride, sizey);
171
      // flip LR
172
      FLIPUD_PTR(*src, *sstride, sizex);
173
      break;
clang-format's avatar
clang-format committed
174
    default: assert(0); break;
175 176
  }
}
177
#endif  // CONFIG_EXT_TX
178

179
#if CONFIG_HIGHBITDEPTH
180
#if CONFIG_EXT_TX && CONFIG_TX64X64
181
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
182
                                  int stride, int bs, int tx_type, int bd) {
183 184 185
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
186

Debargha Mukherjee's avatar
Debargha Mukherjee committed
187
  if (tx_type == IDTX) {
188 189
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
190 191 192
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
193
    }
194 195
  }
}
196
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
197
#endif  // CONFIG_HIGHBITDEPTH
198

Lester Lu's avatar
Lester Lu committed
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
int get_inv_lgt4(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
    return 1;
  }
  return 0;
}

int get_inv_lgt8(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
249
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
250 251
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
252 253 254 255
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
256
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
257 258 259 260
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
261
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
262 263 264 265 266 267 268 269 270 271 272 273
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
274
#endif
275 276 277
  };

  int i, j;
278
  tran_low_t tmp[4][4];
279 280 281
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
282

283 284 285 286
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
287 288 289 290 291 292 293
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_4[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4[tx_type].rows, param, lgtmtx_row, 4);
#endif

294 295
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
296 297 298 299 300 301
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
302
    input += 4;
303 304 305
  }

  // transpose
306 307 308
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
309
    }
310 311 312 313
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
314 315 316 317 318 319
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
320 321 322
  }

#if CONFIG_EXT_TX
323
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
324 325 326 327
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
328
    for (j = 0; j < 4; ++j) {
329 330 331
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
332 333 334 335
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
336
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
337 338
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
339 340 341
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
342
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
343 344 345 346
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
347
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
348 349 350 351 352 353 354 355 356 357 358 359
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
360
#endif
361 362
  };

363 364
  const int n = 4;
  const int n2 = 8;
365
  int i, j;
366
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
367
  tran_low_t *outp = &out[0][0];
368
  int outstride = n2;
369

Lester Lu's avatar
Lester Lu committed
370 371 372 373 374 375 376
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
  int use_lgt_col = get_inv_lgt8(IHT_4x8[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4x8[tx_type].rows, param, lgtmtx_row, 8);
#endif

377
  // inverse transform row vectors and transpose
378
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
379 380 381 382 383 384
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
385
    for (j = 0; j < n; ++j)
386
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
387
    input += n;
388 389 390
  }

  // inverse transform column vectors
391
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
392 393 394 395 396 397
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
398 399
  }

400
#if CONFIG_EXT_TX
401
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
402
#endif
403 404

  // Sum with the destination
405 406
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
407 408 409 410 411 412 413
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
414
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
415 416
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
417 418 419
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
420
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
421 422 423 424
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
425
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
426 427 428 429 430 431 432 433 434 435 436 437
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
438
#endif
439
  };
440

441 442
  const int n = 4;
  const int n2 = 8;
443 444

  int i, j;
445
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
446
  tran_low_t *outp = &out[0][0];
447
  int outstride = n;
448

Lester Lu's avatar
Lester Lu committed
449 450 451 452 453 454 455
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_8x4[tx_type].cols, param, lgtmtx_col, 8);
  int use_lgt_row = get_inv_lgt8(IHT_8x4[tx_type].rows, param, lgtmtx_row, 4);
#endif

456
  // inverse transform row vectors and transpose
457
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
458 459 460 461 462 463
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
464
    for (j = 0; j < n2; ++j)
465
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
466
    input += n2;
467 468 469
  }

  // inverse transform column vectors
470
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
471 472 473 474 475 476
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
477 478
  }

479
#if CONFIG_EXT_TX
480
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
481
#endif
482 483

  // Sum with the destination
484 485
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
486 487 488 489 490 491 492
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

493
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
494 495
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
496 497 498
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
523
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
524 525 526
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
527 528 529 530 531
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt4(IHT_4x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

532 533
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
534 535 536 537 538 539
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
540
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
541 542 543 544
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
545 546 547
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
564 565
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
566 567 568
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
589

590 591 592 593
  const int n = 4;
  const int n4 = 16;

  int i, j;
594
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
595 596 597
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
598 599 600 601 602
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt4(IHT_16x4[tx_type].cols, param, lgtmtx_col, 16);
#endif

603 604 605
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
606
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
607 608 609 610
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
611 612 613 614 615 616 617 618
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
634
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
635 636
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
637 638 639
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
640
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
641 642 643 644
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
645
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
646 647 648 649 650 651 652 653 654 655 656 657
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
658
#endif
659 660 661 662 663
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
664
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
665 666 667
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
668 669 670 671 672
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt8(IHT_8x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

673 674
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
675 676 677 678 679 680
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
681
    for (j = 0; j < n; ++j)
682
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
683
    input += n;
684 685 686 687
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
688
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
689 690
  }

691
#if CONFIG_EXT_TX
692
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
693
#endif
694 695 696 697 698 699 700 701 702 703 704

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
705
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
706 707
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
708 709 710
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
711
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
712 713 714 715
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
716
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
717 718 719 720 721 722 723 724 725 726 727 728
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
729
#endif
730
  };
731

732 733 734 735
  const int n = 8;
  const int n2 = 16;

  int i, j;
736
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
737 738 739
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
740 741 742 743 744
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt8(IHT_16x8[tx_type].cols, param, lgtmtx_col, 16);
#endif

745 746 747 748
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
749
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
750
    input += n2;
751 752 753 754
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
755 756 757 758 759 760
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
761 762
  }

763
#if CONFIG_EXT_TX
764
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
765
#endif
766 767 768 769 770 771 772 773 774 775 776

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

777
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
778 779
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
780 781 782
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
807
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
808 809 810
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
811 812 813 814 815
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
  int use_lgt_row = get_inv_lgt8(IHT_8x32[tx_type].rows, param, lgtmtx_row, 32);
#endif

816 817
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
818 819 820 821 822 823
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
824
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
825 826 827 828
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
829 830 831
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
848 849
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
850 851 852
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
873

874 875 876 877
  const int n = 8;
  const int n4 = 32;

  int i, j;
878
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
879 880 881
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
882 883 884 885 886
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
  int use_lgt_col = get_inv_lgt4(IHT_32x8[tx_type].cols, param, lgtmtx_col, 32);
#endif

887 888 889
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
890
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
891 892 893 894
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
895 896 897 898 899 900 901 902
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
918
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
919 920
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
921 922 923
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
924
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
925 926 927 928
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
929
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
930 931 932 933 934 935 936 937 938 939 940 941
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
942
#endif
943 944 945 946 947
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
948
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
949 950 951 952 953 954 955
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
956
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
957
    input += n;
958 959 960
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
961
  for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
962

963
#if CONFIG_EXT_TX
964
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
965
#endif
966 967 968 969 970 971 972 973 974 975 976

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
977
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
978 979
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
980 981 982
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
983
  static const transform_2d IHT_32x16[] = {
Luca Barbato's avatar
Luca Barbato committed
984 985 986 987
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
988
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
989 990 991 992 993 994 995 996 997 998 999 1000
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST
1001
#endif
1002 1003 1004 1005 1006
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
1007
  tran_low_t out[32][16], tmp[32][16], outtmp[32];
1008 1009 1010 1011 1012 1013 1014
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
1015
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
1016
    input += n2;
1017 1018 1019
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1020
  for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
1021