idct.c 80.7 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24 25 26 27 28 29 30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31 32
}

33 34 35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37 38
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
39 40 41 42
  for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
43
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
44 45
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46 47 48 49
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
50 51 52 53 54 55 56
  for (i = 0; i < 8; ++i) {
#if CONFIG_DAALA_DCT8
    output[i] = input[i];
#else
    output[i] = input[i] * 2;
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
57 58 59 60
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
61 62 63 64
  for (i = 0; i < 16; ++i) {
#if CONFIG_DAALA_DCT16
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
65
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
66 67
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
68 69 70 71
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
72 73 74 75 76 77 78
  for (i = 0; i < 32; ++i) {
#if CONFIG_DAALA_DCT32
    output[i] = input[i];
#else
    output[i] = input[i] * 4;
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
79
}
80 81 82 83 84 85 86 87

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 64; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif  // CONFIG_TX64X64
88
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
89

90
// For use in lieu of ADST
91 92 93 94 95 96 97 98 99 100 101 102 103 104
#if CONFIG_DAALA_DCT32
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // No scaling within; Daala transforms are all orthonormal
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = input[i];
  }
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i];
  }
  aom_idct16_c(inputhalf, output + 16);
}
#else
105 106 107 108 109 110 111
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
112 113 114
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
115
  aom_idct16_c(inputhalf, output + 16);
116 117
  // Note overall scaling factor is 4 times orthogonal
}
118
#endif
119

120 121 122 123 124
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
125
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
126 127 128 129 130 131 132
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
133
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
153
// Inverse identity transform and add.
154
#if CONFIG_EXT_TX
155
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
156
                           int bs, int tx_type) {
157
  int r, c;
158
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
159
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
160 161
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
162 163 164
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
165
    }
166 167
  }
}
168
#endif  // CONFIG_EXT_TX
169

clang-format's avatar
clang-format committed
170 171 172 173 174
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
175

176
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
177 178 179
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
180 181 182 183 184 185 186 187
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
188
    case IDTX:
Jingning Han's avatar
Jingning Han committed
189 190
    case V_DCT:
    case H_DCT:
191
    case V_ADST:
clang-format's avatar
clang-format committed
192
    case H_ADST: break;
193 194
    case FLIPADST_DCT:
    case FLIPADST_ADST:
195
    case V_FLIPADST:
196
      // flip UD
197
      FLIPUD_PTR(*dst, *dstride, sizey);
198 199 200
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
201
    case H_FLIPADST:
202
      // flip LR
203
      FLIPUD_PTR(*src, *sstride, sizex);
204 205 206
      break;
    case FLIPADST_FLIPADST:
      // flip UD
207
      FLIPUD_PTR(*dst, *dstride, sizey);
208
      // flip LR
209
      FLIPUD_PTR(*src, *sstride, sizex);
210
      break;
clang-format's avatar
clang-format committed
211
    default: assert(0); break;
212 213
  }
}
214
#endif  // CONFIG_EXT_TX
215

216
#if CONFIG_HIGHBITDEPTH
217
#if CONFIG_EXT_TX && CONFIG_TX64X64
218
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
219
                                  int stride, int bs, int tx_type, int bd) {
220 221 222
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
223

Debargha Mukherjee's avatar
Debargha Mukherjee committed
224
  if (tx_type == IDTX) {
225 226
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
227 228 229
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
230
    }
231 232
  }
}
233
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
234
#endif  // CONFIG_HIGHBITDEPTH
235

Lester Lu's avatar
Lester Lu committed
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
263
int get_inv_lgt4(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
264 265 266 267
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
268
      lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
Lester Lu's avatar
Lester Lu committed
269 270 271 272 273
    return 1;
  }
  return 0;
}

274
int get_inv_lgt8(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
275 276 277 278
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
279
      lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
Lester Lu's avatar
Lester Lu committed
280 281 282 283 284 285
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
286
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
287 288
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
289 290 291
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
292
#if !CONFIG_DAALA_DCT4
293 294 295 296
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
297
#endif
298
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
299 300 301 302
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
303
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
304 305 306 307 308 309 310 311 312 313 314 315
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
316
#endif
317 318 319
  };

  int i, j;
320
  tran_low_t tmp[4][4];
321 322 323
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
324

325 326 327 328
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
329 330 331
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
332 333 334 335
  int use_lgt_col =
      get_inv_lgt4(IHT_4[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
336 337
#endif

338 339
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
340 341 342 343 344
#if CONFIG_DAALA_DCT4
    tran_low_t temp_in[4];
    for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
    IHT_4[tx_type].rows(temp_in, out[i]);
#else
Lester Lu's avatar
Lester Lu committed
345 346 347 348 349 350
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
351
#endif
clang-format's avatar
clang-format committed
352
    input += 4;
353 354 355
  }

  // transpose
356 357 358
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
359
    }
360 361 362 363
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
364 365 366 367 368 369
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
370 371 372
  }

#if CONFIG_EXT_TX
373
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
374 375 376 377
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
378
    for (j = 0; j < 4; ++j) {
379 380
      int d = i * stride + j;
      int s = j * outstride + i;
381
#if CONFIG_DAALA_DCT4
382
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
383 384 385
#else
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
386 387 388 389
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
390
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
391 392
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
393 394 395
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
396 397 398
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
399
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
400 401 402 403
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
404
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
405 406 407 408 409 410 411 412 413 414 415 416
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
417
#endif
418 419
  };

420 421
  const int n = 4;
  const int n2 = 8;
422
  int i, j;
423
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
424
  tran_low_t *outp = &out[0][0];
425
  int outstride = n2;
426

Lester Lu's avatar
Lester Lu committed
427 428 429
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
430 431 432 433
  int use_lgt_col =
      get_inv_lgt8(IHT_4x8[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4x8[tx_type].rows, txfm_param, lgtmtx_row, 8);
Lester Lu's avatar
Lester Lu committed
434 435
#endif

436
  // inverse transform row vectors and transpose
437
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
438 439 440 441 442 443
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
444
    for (j = 0; j < n; ++j)
445
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
446
    input += n;
447 448 449
  }

  // inverse transform column vectors
450
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
451 452 453 454 455 456
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
457 458
  }

459
#if CONFIG_EXT_TX
460
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
461
#endif
462 463

  // Sum with the destination
464 465
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
466 467 468 469 470 471 472
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
473
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
474 475
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
476 477 478
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
479 480 481
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
482
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
483 484 485 486
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
487
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
488 489 490 491 492 493 494 495 496 497 498 499
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
500
#endif
501
  };
502

503 504
  const int n = 4;
  const int n2 = 8;
505 506

  int i, j;
507
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
508
  tran_low_t *outp = &out[0][0];
509
  int outstride = n;
510

Lester Lu's avatar
Lester Lu committed
511 512 513
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
514 515 516 517
  int use_lgt_col =
      get_inv_lgt4(IHT_8x4[tx_type].cols, txfm_param, lgtmtx_col, 8);
  int use_lgt_row =
      get_inv_lgt8(IHT_8x4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
518 519
#endif

520
  // inverse transform row vectors and transpose
521
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
522 523 524 525 526 527
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
528
    for (j = 0; j < n2; ++j)
529
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
530
    input += n2;
531 532 533
  }

  // inverse transform column vectors
534
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
535 536 537 538 539 540
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
541 542
  }

543
#if CONFIG_EXT_TX
544
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
545
#endif
546 547

  // Sum with the destination
548 549
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
550 551 552 553 554 555 556
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

557
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
558 559
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
560 561 562
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
563 564 565
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
590
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
591 592 593
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
594 595
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
596 597
  int use_lgt_row =
      get_inv_lgt4(IHT_4x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
598 599
#endif

600 601
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
602 603 604 605 606 607
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
608
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
609 610 611 612
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
613 614 615
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
632 633
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
634 635 636
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
637 638 639
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
660

661 662 663 664
  const int n = 4;
  const int n4 = 16;

  int i, j;
665
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
666 667 668
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
669 670
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
671 672
  int use_lgt_col =
      get_inv_lgt4(IHT_16x4[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
673 674
#endif

675 676 677
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
678
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
679 680 681 682
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
683 684 685 686 687 688 689 690
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
706
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
707 708
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
709 710 711
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
712 713 714
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
715
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
716 717 718 719
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
720
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
721 722 723 724 725 726 727 728 729 730 731 732
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
733
#endif
734 735 736 737 738
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
739
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
740 741 742
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
743 744
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
745 746
  int use_lgt_row =
      get_inv_lgt8(IHT_8x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
747 748
#endif

749 750
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
751 752 753 754 755 756
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
757
    for (j = 0; j < n; ++j)
758
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
759
    input += n;
760 761 762 763
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
764
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
765 766
  }

767
#if CONFIG_EXT_TX
768
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
769
#endif
770 771 772 773 774 775 776 777 778 779 780

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
781
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
782 783
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
784 785 786
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
787 788 789
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
790
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
791 792 793 794
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
795
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
796 797 798 799 800 801 802 803 804 805 806 807
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
808
#endif
809
  };
810

811 812 813 814
  const int n = 8;
  const int n2 = 16;

  int i, j;
815
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
816 817 818
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
819 820
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
821 822
  int use_lgt_col =
      get_inv_lgt8(IHT_16x8[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
823 824
#endif

825 826 827 828
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
829
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
830
    input += n2;
831 832 833 834
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
835 836 837 838 839 840
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
841 842
  }

843
#if CONFIG_EXT_TX
844
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
845
#endif
846 847 848 849 850 851 852 853 854 855 856

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

857
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
858 859
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
860 861 862
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
863 864 865
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
890
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
891 892 893
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
894 895
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
896 897
  int use_lgt_row =
      get_inv_lgt8(IHT_8x32[tx_type].rows, txfm_param, lgtmtx_row, 32);
Lester Lu's avatar
Lester Lu committed
898 899
#endif

900 901
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
902 903 904 905 906 907
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
908
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
909 910 911 912
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
913 914 915
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
932 933
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
934 935 936
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
937 938 939
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
960

961 962 963 964
  const int n = 8;
  const int n4 = 32;

  int i, j;
965
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
966 967 968
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
969 970
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
971 972
  int use_lgt_col =
      get_inv_lgt4(IHT_32x8[tx_type].cols, txfm_param, lgtmtx_col, 32);
Lester Lu's avatar
Lester Lu committed
973 974
#endif

975 976 977
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
978
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
979 980 981 982
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
983 984 985 986 987 988 989 990
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
991