idct.c 73 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24 25 26 27 28 29 30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31 32
}

33 34 35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37 38 39 40 41 42 43 44
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 4; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
45
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46 47 48 49 50 51 52 53 54 55
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
56
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
57
}
58 59 60 61 62 63 64 65

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 64; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif  // CONFIG_TX64X64
66
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
67

68
// For use in lieu of ADST
69 70 71 72 73 74 75
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
76 77 78
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
79
  aom_idct16_c(inputhalf, output + 16);
80 81 82
  // Note overall scaling factor is 4 times orthogonal
}

83 84 85 86 87
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
88
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
89 90 91 92 93 94 95
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
96
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
116
// Inverse identity transform and add.
117
#if CONFIG_EXT_TX
118
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
119
                           int bs, int tx_type) {
120
  int r, c;
121
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
122
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
123 124
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
125 126 127
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
128
    }
129 130
  }
}
131
#endif  // CONFIG_EXT_TX
132

clang-format's avatar
clang-format committed
133 134 135 136 137
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
138

139
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
140 141 142
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
143 144 145 146 147 148 149 150
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
151
    case IDTX:
Jingning Han's avatar
Jingning Han committed
152 153
    case V_DCT:
    case H_DCT:
154
    case V_ADST:
clang-format's avatar
clang-format committed
155
    case H_ADST: break;
156 157
    case FLIPADST_DCT:
    case FLIPADST_ADST:
158
    case V_FLIPADST:
159
      // flip UD
160
      FLIPUD_PTR(*dst, *dstride, sizey);
161 162 163
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
164
    case H_FLIPADST:
165
      // flip LR
166
      FLIPUD_PTR(*src, *sstride, sizex);
167 168 169
      break;
    case FLIPADST_FLIPADST:
      // flip UD
170
      FLIPUD_PTR(*dst, *dstride, sizey);
171
      // flip LR
172
      FLIPUD_PTR(*src, *sstride, sizex);
173
      break;
clang-format's avatar
clang-format committed
174
    default: assert(0); break;
175 176
  }
}
177
#endif  // CONFIG_EXT_TX
178

179
#if CONFIG_HIGHBITDEPTH
180
#if CONFIG_EXT_TX && CONFIG_TX64X64
181
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
182
                                  int stride, int bs, int tx_type, int bd) {
183 184 185
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
186

Debargha Mukherjee's avatar
Debargha Mukherjee committed
187
  if (tx_type == IDTX) {
188 189
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
190 191 192
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
193
    }
194 195
  }
}
196
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
197
#endif  // CONFIG_HIGHBITDEPTH
198

Lester Lu's avatar
Lester Lu committed
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
int get_inv_lgt4(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
    return 1;
  }
  return 0;
}

int get_inv_lgt8(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
249
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
250 251
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
252
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
253 254 255 256
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
257
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
258 259 260 261 262 263 264 265 266 267 268 269
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
270
#endif
271 272 273
  };

  int i, j;
274
  tran_low_t tmp[4][4];
275 276 277
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
278

Lester Lu's avatar
Lester Lu committed
279 280 281 282 283 284 285
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_4[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4[tx_type].rows, param, lgtmtx_row, 4);
#endif

286 287
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
288 289 290 291 292 293
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
294
    input += 4;
295 296 297
  }

  // transpose
298 299 300
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
301
    }
302 303 304 305
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
306 307 308 309 310 311
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
312 313 314
  }

#if CONFIG_EXT_TX
315
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
316 317 318 319
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
320
    for (j = 0; j < 4; ++j) {
321 322 323
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
324 325 326 327
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
328
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
329 330
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
331
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
332 333 334 335
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
336
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
337 338 339 340 341 342 343 344 345 346 347 348
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
349
#endif
350 351
  };

352 353
  const int n = 4;
  const int n2 = 8;
354
  int i, j;
355
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
356
  tran_low_t *outp = &out[0][0];
357
  int outstride = n2;
358

Lester Lu's avatar
Lester Lu committed
359 360 361 362 363 364 365
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
  int use_lgt_col = get_inv_lgt8(IHT_4x8[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4x8[tx_type].rows, param, lgtmtx_row, 8);
#endif

366
  // inverse transform row vectors and transpose
367
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
368 369 370 371 372 373
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
374
    for (j = 0; j < n; ++j)
375
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
376
    input += n;
377 378 379
  }

  // inverse transform column vectors
380
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
381 382 383 384 385 386
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
387 388
  }

389
#if CONFIG_EXT_TX
390
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
391
#endif
392 393

  // Sum with the destination
394 395
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
396 397 398 399 400 401 402
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
403
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
404 405
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
406
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
407 408 409 410
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
411
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
412 413 414 415 416 417 418 419 420 421 422 423
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
424
#endif
425
  };
426

427 428
  const int n = 4;
  const int n2 = 8;
429 430

  int i, j;
431
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
432
  tran_low_t *outp = &out[0][0];
433
  int outstride = n;
434

Lester Lu's avatar
Lester Lu committed
435 436 437 438 439 440 441
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_8x4[tx_type].cols, param, lgtmtx_col, 8);
  int use_lgt_row = get_inv_lgt8(IHT_8x4[tx_type].rows, param, lgtmtx_row, 4);
#endif

442
  // inverse transform row vectors and transpose
443
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
444 445 446 447 448 449
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
450
    for (j = 0; j < n2; ++j)
451
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
452
    input += n2;
453 454 455
  }

  // inverse transform column vectors
456
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
457 458 459 460 461 462
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
463 464
  }

465
#if CONFIG_EXT_TX
466
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
467
#endif
468 469

  // Sum with the destination
470 471
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
472 473 474 475 476 477 478
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

479
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
480 481
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
506
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
507 508 509
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
510 511 512 513 514
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt4(IHT_4x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

515 516
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
517 518 519 520 521 522
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
523
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
524 525 526 527
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
528 529 530
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
547 548
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
569

570 571 572 573
  const int n = 4;
  const int n4 = 16;

  int i, j;
574
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
575 576 577
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
578 579 580 581 582
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt4(IHT_16x4[tx_type].cols, param, lgtmtx_col, 16);
#endif

583 584 585
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
586
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
587 588 589 590
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
591 592 593 594 595 596 597 598
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
614
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
615 616
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
617
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
618 619 620 621
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
622
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
623 624 625 626 627 628 629 630 631 632 633 634
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
635
#endif
636 637 638 639 640
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
641
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
642 643 644
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
645 646 647 648 649
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt8(IHT_8x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

650 651
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
652 653 654 655 656 657
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
658
    for (j = 0; j < n; ++j)
659
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
660
    input += n;
661 662 663 664
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
665
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
666 667
  }

668
#if CONFIG_EXT_TX
669
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
670
#endif
671 672 673 674 675 676 677 678 679 680 681

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
682
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
683 684
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
685
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
686 687 688 689
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
690
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
691 692 693 694 695 696 697 698 699 700 701 702
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
703
#endif
704
  };
705

706 707 708 709
  const int n = 8;
  const int n2 = 16;

  int i, j;
710
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
711 712 713
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
714 715 716 717 718
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt8(IHT_16x8[tx_type].cols, param, lgtmtx_col, 16);
#endif

719 720 721 722
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
723
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
724
    input += n2;
725 726 727 728
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
729 730 731 732 733 734
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
735 736
  }

737
#if CONFIG_EXT_TX
738
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
739
#endif
740 741 742 743 744 745 746 747 748 749 750

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

751
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
752 753
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
778
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
779 780 781
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
782 783 784 785 786
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
  int use_lgt_row = get_inv_lgt8(IHT_8x32[tx_type].rows, param, lgtmtx_row, 32);
#endif

787 788
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
789 790 791 792 793 794
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
795
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
796 797 798 799
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
800 801 802
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
819 820
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
841

842 843 844 845
  const int n = 8;
  const int n4 = 32;

  int i, j;
846
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
847 848 849
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
850 851 852 853 854
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
  int use_lgt_col = get_inv_lgt4(IHT_32x8[tx_type].cols, param, lgtmtx_col, 32);
#endif

855 856 857
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
858
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
859 860 861 862
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
863 864 865 866 867 868 869 870
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
886
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
887 888
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
889
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
890 891 892 893
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
894
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
895 896 897 898 899 900 901 902 903 904 905 906
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
907
#endif
908 909 910 911 912
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
913
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
914 915 916 917 918 919 920
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
921
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
922
    input += n;
923 924 925
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
926
  for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
927

928
#if CONFIG_EXT_TX
929
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
930
#endif
931 932 933 934 935 936 937 938 939 940 941

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
942
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
943 944
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
945
  static const transform_2d IHT_32x16[] = {
Luca Barbato's avatar
Luca Barbato committed
946 947 948 949
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
950
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
951 952 953 954 955 956 957 958 959 960 961 962
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST
963
#endif
964 965 966 967 968
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
969
  tran_low_t out[32][16], tmp[32][16], outtmp[32];
970 971 972 973 974 975 976
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
977
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
978
    input += n2;
979 980 981
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
982
  for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
983

984
#if CONFIG_EXT_TX
985
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
986
#endif
987 988 989 990 991 992 993 994 995 996 997

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
998
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
999 1000
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
1001
  static const transform_2d IHT_8[] = {
Luca Barbato's avatar
Luca Barbato committed
1002 1003 1004 1005
    { aom_idct8_c, aom_idct8_c },    // DCT_DCT  = 0
    { aom_iadst8_c, aom_idct8_c },   // ADST_DCT = 1
    { aom_idct8_c, aom_iadst8_c },   // DCT_ADST = 2
    { aom_iadst8_c, aom_iadst8_c },  // ADST_ADST = 3
1006
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
    { aom_iadst8_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx8_c },          // IDTX
    { aom_idct8_c, iidtx8_c },       // V_DCT
    { iidtx8_c, aom_idct8_c },       // H_DCT
    { aom_iadst8_c, iidtx8_c },      // V_ADST
    { iidtx8_c, aom_iadst8_c },      // H_ADST
    { aom_iadst8_c, iidtx8_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst8_c },      // H_FLIPADST
Lester Lu's avatar
Lester Lu committed
1019
#endif
Debargha Mukherjee's avatar