idct.c 81.7 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15
16
17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19
20
21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24
25
26
27
28
29
30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31
32
}

33
34
35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37
38
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
39
40
41
42
  for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
43
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
44
45
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46
47
48
49
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
50
51
52
53
54
55
56
  for (i = 0; i < 8; ++i) {
#if CONFIG_DAALA_DCT8
    output[i] = input[i];
#else
    output[i] = input[i] * 2;
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
57
58
59
60
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
61
62
63
64
  for (i = 0; i < 16; ++i) {
#if CONFIG_DAALA_DCT16
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
65
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
66
67
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
68
69
70
71
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
72
73
74
75
76
77
78
  for (i = 0; i < 32; ++i) {
#if CONFIG_DAALA_DCT32
    output[i] = input[i];
#else
    output[i] = input[i] * 4;
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
79
}
80
81
82
83

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
84
85
86
87
  for (i = 0; i < 64; ++i) {
#if CONFIG_DAALA_DCT64
    output[i] = input[i];
#else
88
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
89
90
#endif
  }
91
92
}
#endif  // CONFIG_TX64X64
93
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
94

95
// For use in lieu of ADST
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#if CONFIG_DAALA_DCT32
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // No scaling within; Daala transforms are all orthonormal
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = input[i];
  }
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i];
  }
  aom_idct16_c(inputhalf, output + 16);
}
#else
110
111
112
113
114
115
116
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
117
118
119
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
120
  aom_idct16_c(inputhalf, output + 16);
121
122
  // Note overall scaling factor is 4 times orthogonal
}
123
#endif
124

125
#if CONFIG_TX64X64
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#if CONFIG_DAALA_DCT64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  aom_idct64_c(input, output);
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  aom_idct64_c(input, output);
}

static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // No scaling within; Daala transforms are all orthonormal
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = input[i];
  }
  for (i = 0; i < 32; ++i) {
    output[i] = input[32 + i];
  }
  aom_idct32_c(inputhalf, output + 32);
}

#else
149
150
151
152
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
153
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
154
155
156
157
158
159
160
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
161
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
179
#endif  // CONFIG_DAALA_DCT64
180
181
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
182
// Inverse identity transform and add.
183
#if CONFIG_EXT_TX
184
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
185
                           int bs, int tx_type) {
186
  int r, c;
187
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
188
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
189
190
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
191
192
193
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
194
    }
195
196
  }
}
197
#endif  // CONFIG_EXT_TX
198

clang-format's avatar
clang-format committed
199
200
201
202
203
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
204

205
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
206
207
208
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
209
210
211
212
213
214
215
216
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
217
    case IDTX:
Jingning Han's avatar
Jingning Han committed
218
219
    case V_DCT:
    case H_DCT:
220
    case V_ADST:
clang-format's avatar
clang-format committed
221
    case H_ADST: break;
222
223
    case FLIPADST_DCT:
    case FLIPADST_ADST:
224
    case V_FLIPADST:
225
      // flip UD
226
      FLIPUD_PTR(*dst, *dstride, sizey);
227
228
229
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
230
    case H_FLIPADST:
231
      // flip LR
232
      FLIPUD_PTR(*src, *sstride, sizex);
233
234
235
      break;
    case FLIPADST_FLIPADST:
      // flip UD
236
      FLIPUD_PTR(*dst, *dstride, sizey);
237
      // flip LR
238
      FLIPUD_PTR(*src, *sstride, sizex);
239
      break;
clang-format's avatar
clang-format committed
240
    default: assert(0); break;
241
242
  }
}
243
#endif  // CONFIG_EXT_TX
244

245
#if CONFIG_HIGHBITDEPTH
246
#if CONFIG_EXT_TX && CONFIG_TX64X64
247
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
248
                                  int stride, int bs, int tx_type, int bd) {
249
250
251
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
252

Debargha Mukherjee's avatar
Debargha Mukherjee committed
253
  if (tx_type == IDTX) {
254
255
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
256
257
258
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
259
    }
260
261
  }
}
262
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
263
#endif  // CONFIG_HIGHBITDEPTH
264

Lester Lu's avatar
Lester Lu committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
292
int get_inv_lgt4(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
293
294
295
296
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
297
      lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
Lester Lu's avatar
Lester Lu committed
298
299
300
301
302
    return 1;
  }
  return 0;
}

303
int get_inv_lgt8(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
304
305
306
307
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
308
      lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
Lester Lu's avatar
Lester Lu committed
309
310
311
312
313
314
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
315
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
316
317
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
318
319
320
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
321
#if !CONFIG_DAALA_DCT4
322
323
324
325
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
326
#endif
327
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
328
329
330
331
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
332
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
333
334
335
336
337
338
339
340
341
342
343
344
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
345
#endif
346
347
348
  };

  int i, j;
349
  tran_low_t tmp[4][4];
350
351
352
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
353

354
355
356
357
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
358
359
360
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
361
362
363
364
  int use_lgt_col =
      get_inv_lgt4(IHT_4[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
365
366
#endif

367
368
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
369
370
371
372
373
#if CONFIG_DAALA_DCT4
    tran_low_t temp_in[4];
    for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
    IHT_4[tx_type].rows(temp_in, out[i]);
#else
Lester Lu's avatar
Lester Lu committed
374
375
376
377
378
379
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
380
#endif
clang-format's avatar
clang-format committed
381
    input += 4;
382
383
384
  }

  // transpose
385
386
387
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
388
    }
389
390
391
392
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
393
394
395
396
397
398
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
399
400
401
  }

#if CONFIG_EXT_TX
402
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
403
404
405
406
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
407
    for (j = 0; j < 4; ++j) {
408
409
      int d = i * stride + j;
      int s = j * outstride + i;
410
#if CONFIG_DAALA_DCT4
411
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
412
413
414
#else
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
415
416
417
418
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
419
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
420
421
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
422
423
424
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
425
426
427
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
428
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
429
430
431
432
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
433
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
434
435
436
437
438
439
440
441
442
443
444
445
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
446
#endif
447
448
  };

449
450
  const int n = 4;
  const int n2 = 8;
451
  int i, j;
452
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
453
  tran_low_t *outp = &out[0][0];
454
  int outstride = n2;
455

Lester Lu's avatar
Lester Lu committed
456
457
458
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
459
460
461
462
  int use_lgt_col =
      get_inv_lgt8(IHT_4x8[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4x8[tx_type].rows, txfm_param, lgtmtx_row, 8);
Lester Lu's avatar
Lester Lu committed
463
464
#endif

465
  // inverse transform row vectors and transpose
466
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
467
468
469
470
471
472
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
473
    for (j = 0; j < n; ++j)
474
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
475
    input += n;
476
477
478
  }

  // inverse transform column vectors
479
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
480
481
482
483
484
485
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
486
487
  }

488
#if CONFIG_EXT_TX
489
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
490
#endif
491
492

  // Sum with the destination
493
494
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
495
496
497
498
499
500
501
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
502
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
503
504
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
505
506
507
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
508
509
510
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
511
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
512
513
514
515
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
516
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
517
518
519
520
521
522
523
524
525
526
527
528
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
529
#endif
530
  };
531

532
533
  const int n = 4;
  const int n2 = 8;
534
535

  int i, j;
536
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
537
  tran_low_t *outp = &out[0][0];
538
  int outstride = n;
539

Lester Lu's avatar
Lester Lu committed
540
541
542
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
543
544
545
546
  int use_lgt_col =
      get_inv_lgt4(IHT_8x4[tx_type].cols, txfm_param, lgtmtx_col, 8);
  int use_lgt_row =
      get_inv_lgt8(IHT_8x4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
547
548
#endif

549
  // inverse transform row vectors and transpose
550
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
551
552
553
554
555
556
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
557
    for (j = 0; j < n2; ++j)
558
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
559
    input += n2;
560
561
562
  }

  // inverse transform column vectors
563
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
564
565
566
567
568
569
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
570
571
  }

572
#if CONFIG_EXT_TX
573
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
574
#endif
575
576

  // Sum with the destination
577
578
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
579
580
581
582
583
584
585
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

586
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
587
588
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
589
590
591
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
592
593
594
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
619
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
620
621
622
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
623
624
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
625
626
  int use_lgt_row =
      get_inv_lgt4(IHT_4x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
627
628
#endif

629
630
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
631
632
633
634
635
636
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
637
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
638
639
640
641
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
642
643
644
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
661
662
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
663
664
665
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
666
667
668
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
689

690
691
692
693
  const int n = 4;
  const int n4 = 16;

  int i, j;
694
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
695
696
697
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
698
699
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
700
701
  int use_lgt_col =
      get_inv_lgt4(IHT_16x4[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
702
703
#endif

704
705
706
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
707
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
708
709
710
711
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
712
713
714
715
716
717
718
719
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
735
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
736
737
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
738
739
740
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
741
742
743
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
744
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
745
746
747
748
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
749
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
750
751
752
753
754
755
756
757
758
759
760
761
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
762
#endif
763
764
765
766
767
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
768
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
769
770
771
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
772
773
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
774
775
  int use_lgt_row =
      get_inv_lgt8(IHT_8x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
776
777
#endif

778
779
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
780
781
782
783
784
785
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
786
    for (j = 0; j < n; ++j)
787
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
788
    input += n;
789
790
791
792
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
793
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
794
795
  }

796
#if CONFIG_EXT_TX
797
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
798
#endif
799
800
801
802
803
804
805
806
807
808
809

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
810
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
811
812
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
813
814
815
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
816
817
818
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
819
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
820
821
822
823
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
824
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
825
826
827
828
829
830
831
832
833
834
835
836
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
837
#endif
838
  };
839

840
841
842
843
  const int n = 8;
  const int n2 = 16;

  int i, j;
844
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
845
846
847
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
848
849
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
850
851
  int use_lgt_col =
      get_inv_lgt8(IHT_16x8[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
852
853
#endif

854
855
856
857
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
858
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
859
    input += n2;
860
861
862
863
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
864
865
866
867
868
869
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
870
871
  }

872
#if CONFIG_EXT_TX
873
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
874
#endif
875
876
877
878
879
880
881
882
883
884
885

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

886
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
887
888
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
889
890
891
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
892
893
894
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
919
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
920
921
922
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
923
924
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
925
926
  int use_lgt_row =
      get_inv_lgt8(IHT_8x32[tx_type].rows, txfm_param, lgtmtx_row, 32);
Lester Lu's avatar
Lester Lu committed
927
928
#endif

929
930
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
931
932
933
934
935
936
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
937
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
938
939
940
941
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
942
943
944
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
961
962
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
963
964
965
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
966
967
968
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
989

990
991
992
993
  const int n = 8;
  const int n4 = 32;

  int i, j;
994
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
995
996
997
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
998
999
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
1000
1001
  int use_lgt_col =
      get_inv_lgt4(IHT_32x8[tx_type].cols, txfm_param, lgtmtx_col, 32);
Lester Lu's avatar
Lester Lu committed
1002
1003
#endif

1004
1005
1006
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
1007
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
1008
1009
1010
1011
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1012
1013
1014
1015
1016
1017
1018
1019
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
1035
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1036
1037
                            const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
1038
1039
1040
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
1041
1042
1043
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
1044
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
1045
1046
1047
1048
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
1049
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
1062
#endif
1063
1064
1065
1066
1067
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
1068
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
1069
1070
1071
1072
1073
1074
1075
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
1076
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
1077
    input += n;
1078
1079
1080
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1081
  for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
1082

1083
#if CONFIG_EXT_TX
1084
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1085
#endif
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
1097
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1098
1099
                            const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
1100
1101
1102
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
1103
1104
1105
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
1106
  static const transform_2d IHT_32x16[] = {
Luca Barbato's avatar
Luca Barbato committed
1107
1108
1109
1110
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
1111
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST
1124
#endif
1125
1126
1127
1128
1129
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
1130
  tran_low_t out[32][16], tmp[32][16], outtmp[32];
1131
1132
1133
1134
1135
1136
1137
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
1138
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
1139
    input += n2;
1140
1141
1142
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1143
  for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
1144

1145
#if CONFIG_EXT_TX
1146
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1147
#endif
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
1159
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1160
1161
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
1162
1163
1164
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
1165
1166
1167
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
1168
  static const transform_2d IHT_8[] = {
Luca Barbato's avatar
Luca Barbato committed