idct.c 75.3 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24 25 26 27 28 29 30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31 32
}

33 34 35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37 38
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
39 40 41 42
  for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
43
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
44 45
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46 47 48 49
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
50
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
51 52 53 54 55 56 57 58 59 60
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
61
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
62
}
63 64 65 66 67 68 69 70

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 64; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif  // CONFIG_TX64X64
71
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
72

73
// For use in lieu of ADST
74 75 76 77 78 79 80
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
81 82 83
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
84
  aom_idct16_c(inputhalf, output + 16);
85 86 87
  // Note overall scaling factor is 4 times orthogonal
}

88 89 90 91 92
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
93
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
94 95 96 97 98 99 100
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
101
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
121
// Inverse identity transform and add.
122
#if CONFIG_EXT_TX
123
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
124
                           int bs, int tx_type) {
125
  int r, c;
126
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
127
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
128 129
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
130 131 132
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
133
    }
134 135
  }
}
136
#endif  // CONFIG_EXT_TX
137

clang-format's avatar
clang-format committed
138 139 140 141 142
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
143

144
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
145 146 147
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
148 149 150 151 152 153 154 155
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
156
    case IDTX:
Jingning Han's avatar
Jingning Han committed
157 158
    case V_DCT:
    case H_DCT:
159
    case V_ADST:
clang-format's avatar
clang-format committed
160
    case H_ADST: break;
161 162
    case FLIPADST_DCT:
    case FLIPADST_ADST:
163
    case V_FLIPADST:
164
      // flip UD
165
      FLIPUD_PTR(*dst, *dstride, sizey);
166 167 168
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
169
    case H_FLIPADST:
170
      // flip LR
171
      FLIPUD_PTR(*src, *sstride, sizex);
172 173 174
      break;
    case FLIPADST_FLIPADST:
      // flip UD
175
      FLIPUD_PTR(*dst, *dstride, sizey);
176
      // flip LR
177
      FLIPUD_PTR(*src, *sstride, sizex);
178
      break;
clang-format's avatar
clang-format committed
179
    default: assert(0); break;
180 181
  }
}
182
#endif  // CONFIG_EXT_TX
183

184
#if CONFIG_HIGHBITDEPTH
185
#if CONFIG_EXT_TX && CONFIG_TX64X64
186
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
187
                                  int stride, int bs, int tx_type, int bd) {
188 189 190
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
191

Debargha Mukherjee's avatar
Debargha Mukherjee committed
192
  if (tx_type == IDTX) {
193 194
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
195 196 197
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
198
    }
199 200
  }
}
201
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
202
#endif  // CONFIG_HIGHBITDEPTH
203

Lester Lu's avatar
Lester Lu committed
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
231
int get_inv_lgt4(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
232 233 234 235
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
236
      lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
Lester Lu's avatar
Lester Lu committed
237 238 239 240 241
    return 1;
  }
  return 0;
}

242
int get_inv_lgt8(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
243 244 245 246
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
247
      lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
Lester Lu's avatar
Lester Lu committed
248 249 250 251 252 253
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
254
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
255 256
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
257
#if !CONFIG_DAALA_DCT4
258 259 260 261
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
262
#endif
263
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
264 265 266 267
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
268
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
269 270 271 272 273 274 275 276 277 278 279 280
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
281
#endif
282 283 284
  };

  int i, j;
285
  tran_low_t tmp[4][4];
286 287 288
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
289

290 291 292 293
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
294 295 296
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
297 298 299 300
  int use_lgt_col =
      get_inv_lgt4(IHT_4[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
301 302
#endif

303 304
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
305 306 307 308 309
#if CONFIG_DAALA_DCT4
    tran_low_t temp_in[4];
    for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
    IHT_4[tx_type].rows(temp_in, out[i]);
#else
Lester Lu's avatar
Lester Lu committed
310 311 312 313 314 315
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
316
#endif
clang-format's avatar
clang-format committed
317
    input += 4;
318 319 320
  }

  // transpose
321 322 323
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
324
    }
325 326 327 328
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
329 330 331 332 333 334
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
335 336 337
  }

#if CONFIG_EXT_TX
338
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
339 340 341 342
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
343
    for (j = 0; j < 4; ++j) {
344 345
      int d = i * stride + j;
      int s = j * outstride + i;
346
#if CONFIG_DAALA_DCT4
347
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
348 349 350
#else
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
351 352 353 354
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
355
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
356 357
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
358 359 360
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
361
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
362 363 364 365
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
366
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
367 368 369 370 371 372 373 374 375 376 377 378
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
379
#endif
380 381
  };

382 383
  const int n = 4;
  const int n2 = 8;
384
  int i, j;
385
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
386
  tran_low_t *outp = &out[0][0];
387
  int outstride = n2;
388

Lester Lu's avatar
Lester Lu committed
389 390 391
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
392 393 394 395
  int use_lgt_col =
      get_inv_lgt8(IHT_4x8[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4x8[tx_type].rows, txfm_param, lgtmtx_row, 8);
Lester Lu's avatar
Lester Lu committed
396 397
#endif

398
  // inverse transform row vectors and transpose
399
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
400 401 402 403 404 405
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
406
    for (j = 0; j < n; ++j)
407
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
408
    input += n;
409 410 411
  }

  // inverse transform column vectors
412
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
413 414 415 416 417 418
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
419 420
  }

421
#if CONFIG_EXT_TX
422
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
423
#endif
424 425

  // Sum with the destination
426 427
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
428 429 430 431 432 433 434
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
435
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
436 437
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
438 439 440
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
441
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
442 443 444 445
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
446
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
447 448 449 450 451 452 453 454 455 456 457 458
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
459
#endif
460
  };
461

462 463
  const int n = 4;
  const int n2 = 8;
464 465

  int i, j;
466
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
467
  tran_low_t *outp = &out[0][0];
468
  int outstride = n;
469

Lester Lu's avatar
Lester Lu committed
470 471 472
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
473 474 475 476
  int use_lgt_col =
      get_inv_lgt4(IHT_8x4[tx_type].cols, txfm_param, lgtmtx_col, 8);
  int use_lgt_row =
      get_inv_lgt8(IHT_8x4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
477 478
#endif

479
  // inverse transform row vectors and transpose
480
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
481 482 483 484 485 486
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
487
    for (j = 0; j < n2; ++j)
488
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
489
    input += n2;
490 491 492
  }

  // inverse transform column vectors
493
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
494 495 496 497 498 499
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
500 501
  }

502
#if CONFIG_EXT_TX
503
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
504
#endif
505 506

  // Sum with the destination
507 508
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
509 510 511 512 513 514 515
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

516
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
517 518
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
519 520 521
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
546
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
547 548 549
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
550 551
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
552 553
  int use_lgt_row =
      get_inv_lgt4(IHT_4x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
554 555
#endif

556 557
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
558 559 560 561 562 563
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
564
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
565 566 567 568
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
569 570 571
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
588 589
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
590 591 592
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
613

614 615 616 617
  const int n = 4;
  const int n4 = 16;

  int i, j;
618
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
619 620 621
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
622 623
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
624 625
  int use_lgt_col =
      get_inv_lgt4(IHT_16x4[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
626 627
#endif

628 629 630
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
631
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
632 633 634 635
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
636 637 638 639 640 641 642 643
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
659
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
660 661
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
662 663 664
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
665
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
666 667 668 669
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
670
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
671 672 673 674 675 676 677 678 679 680 681 682
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
683
#endif
684 685 686 687 688
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
689
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
690 691 692
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
693 694
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
695 696
  int use_lgt_row =
      get_inv_lgt8(IHT_8x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
697 698
#endif

699 700
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
701 702 703 704 705 706
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
707
    for (j = 0; j < n; ++j)
708
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
709
    input += n;
710 711 712 713
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
714
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
715 716
  }

717
#if CONFIG_EXT_TX
718
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
719
#endif
720 721 722 723 724 725 726 727 728 729 730

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
731
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
732 733
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
734 735 736
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
737
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
738 739 740 741
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
742
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
743 744 745 746 747 748 749 750 751 752 753 754
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
755
#endif
756
  };
757

758 759 760 761
  const int n = 8;
  const int n2 = 16;

  int i, j;
762
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
763 764 765
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
766 767
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
768 769
  int use_lgt_col =
      get_inv_lgt8(IHT_16x8[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
770 771
#endif

772 773 774 775
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
776
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
777
    input += n2;
778 779 780 781
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
782 783 784 785 786 787
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
788 789
  }

790
#if CONFIG_EXT_TX
791
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
792
#endif
793 794 795 796 797 798 799 800 801 802 803

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

804
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
805 806
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
807 808 809
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
834
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
835 836 837
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
838 839
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
840 841
  int use_lgt_row =
      get_inv_lgt8(IHT_8x32[tx_type].rows, txfm_param, lgtmtx_row, 32);
Lester Lu's avatar
Lester Lu committed
842 843
#endif

844 845
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
846 847 848 849 850 851
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
852
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
853 854 855 856
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
857 858 859
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
876 877
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
878 879 880
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
901

902 903 904 905
  const int n = 8;
  const int n4 = 32;

  int i, j;
906
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
907 908 909
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
910 911
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
912 913
  int use_lgt_col =
      get_inv_lgt4(IHT_32x8[tx_type].cols, txfm_param, lgtmtx_col, 32);
Lester Lu's avatar
Lester Lu committed
914 915
#endif

916 917 918
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
919
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
920 921 922 923
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
924 925 926 927 928 929 930 931
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
947
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
948 949
                            const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
950 951 952
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
953
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
954 955 956 957
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
958
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
959 960 961 962 963 964 965 966 967 968 969 970
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
971
#endif
972 973 974 975 976
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
977
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
978 979 980 981 982 983 984
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
985
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
986
    input += n;
987 988 989
  }

  // inverse transform column vectors