idct.c 79.8 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15
16
17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19
20
21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24
25
26
27
28
29
30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31
32
}

33
34
35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37
38
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
39
40
41
42
  for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
43
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
44
45
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46
47
48
49
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
50
51
52
53
54
55
56
  for (i = 0; i < 8; ++i) {
#if CONFIG_DAALA_DCT8
    output[i] = input[i];
#else
    output[i] = input[i] * 2;
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
57
58
59
60
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
61
62
63
64
  for (i = 0; i < 16; ++i) {
#if CONFIG_DAALA_DCT16
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
65
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
66
67
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
68
69
70
71
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
72
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
73
}
74
75
76
77
78
79
80
81

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 64; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif  // CONFIG_TX64X64
82
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
83

84
// For use in lieu of ADST
85
86
87
88
89
90
91
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
92
93
94
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
95
  aom_idct16_c(inputhalf, output + 16);
96
97
98
  // Note overall scaling factor is 4 times orthogonal
}

99
100
101
102
103
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
104
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
105
106
107
108
109
110
111
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
112
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
132
// Inverse identity transform and add.
133
#if CONFIG_EXT_TX
134
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
135
                           int bs, int tx_type) {
136
  int r, c;
137
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
138
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
139
140
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
141
142
143
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
144
    }
145
146
  }
}
147
#endif  // CONFIG_EXT_TX
148

clang-format's avatar
clang-format committed
149
150
151
152
153
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
154

155
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
156
157
158
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
159
160
161
162
163
164
165
166
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
167
    case IDTX:
Jingning Han's avatar
Jingning Han committed
168
169
    case V_DCT:
    case H_DCT:
170
    case V_ADST:
clang-format's avatar
clang-format committed
171
    case H_ADST: break;
172
173
    case FLIPADST_DCT:
    case FLIPADST_ADST:
174
    case V_FLIPADST:
175
      // flip UD
176
      FLIPUD_PTR(*dst, *dstride, sizey);
177
178
179
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
180
    case H_FLIPADST:
181
      // flip LR
182
      FLIPUD_PTR(*src, *sstride, sizex);
183
184
185
      break;
    case FLIPADST_FLIPADST:
      // flip UD
186
      FLIPUD_PTR(*dst, *dstride, sizey);
187
      // flip LR
188
      FLIPUD_PTR(*src, *sstride, sizex);
189
      break;
clang-format's avatar
clang-format committed
190
    default: assert(0); break;
191
192
  }
}
193
#endif  // CONFIG_EXT_TX
194

195
#if CONFIG_HIGHBITDEPTH
196
#if CONFIG_EXT_TX && CONFIG_TX64X64
197
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
198
                                  int stride, int bs, int tx_type, int bd) {
199
200
201
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
202

Debargha Mukherjee's avatar
Debargha Mukherjee committed
203
  if (tx_type == IDTX) {
204
205
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
206
207
208
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
209
    }
210
211
  }
}
212
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
213
#endif  // CONFIG_HIGHBITDEPTH
214

Lester Lu's avatar
Lester Lu committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
242
int get_inv_lgt4(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
243
244
245
246
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
247
      lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
Lester Lu's avatar
Lester Lu committed
248
249
250
251
252
    return 1;
  }
  return 0;
}

253
int get_inv_lgt8(transform_1d tx_orig, const TxfmParam *txfm_param,
Lester Lu's avatar
Lester Lu committed
254
255
256
257
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
258
      lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
Lester Lu's avatar
Lester Lu committed
259
260
261
262
263
264
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
265
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
266
267
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
268
269
270
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
271
#if !CONFIG_DAALA_DCT4
272
273
274
275
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
276
#endif
277
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
278
279
280
281
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
282
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
283
284
285
286
287
288
289
290
291
292
293
294
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
295
#endif
296
297
298
  };

  int i, j;
299
  tran_low_t tmp[4][4];
300
301
302
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
303

304
305
306
307
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
308
309
310
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
311
312
313
314
  int use_lgt_col =
      get_inv_lgt4(IHT_4[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
315
316
#endif

317
318
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
319
320
321
322
323
#if CONFIG_DAALA_DCT4
    tran_low_t temp_in[4];
    for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
    IHT_4[tx_type].rows(temp_in, out[i]);
#else
Lester Lu's avatar
Lester Lu committed
324
325
326
327
328
329
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
330
#endif
clang-format's avatar
clang-format committed
331
    input += 4;
332
333
334
  }

  // transpose
335
336
337
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
338
    }
339
340
341
342
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
343
344
345
346
347
348
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
349
350
351
  }

#if CONFIG_EXT_TX
352
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
353
354
355
356
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
357
    for (j = 0; j < 4; ++j) {
358
359
      int d = i * stride + j;
      int s = j * outstride + i;
360
#if CONFIG_DAALA_DCT4
361
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
362
363
364
#else
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
365
366
367
368
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
369
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
370
371
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
372
373
374
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
375
376
377
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
378
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
379
380
381
382
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
383
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
384
385
386
387
388
389
390
391
392
393
394
395
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
396
#endif
397
398
  };

399
400
  const int n = 4;
  const int n2 = 8;
401
  int i, j;
402
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
403
  tran_low_t *outp = &out[0][0];
404
  int outstride = n2;
405

Lester Lu's avatar
Lester Lu committed
406
407
408
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
409
410
411
412
  int use_lgt_col =
      get_inv_lgt8(IHT_4x8[tx_type].cols, txfm_param, lgtmtx_col, 4);
  int use_lgt_row =
      get_inv_lgt4(IHT_4x8[tx_type].rows, txfm_param, lgtmtx_row, 8);
Lester Lu's avatar
Lester Lu committed
413
414
#endif

415
  // inverse transform row vectors and transpose
416
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
417
418
419
420
421
422
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
423
    for (j = 0; j < n; ++j)
424
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
425
    input += n;
426
427
428
  }

  // inverse transform column vectors
429
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
430
431
432
433
434
435
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
436
437
  }

438
#if CONFIG_EXT_TX
439
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
440
#endif
441
442

  // Sum with the destination
443
444
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
445
446
447
448
449
450
451
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
452
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
453
454
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
455
456
457
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
458
459
460
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
461
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
462
463
464
465
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
466
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
467
468
469
470
471
472
473
474
475
476
477
478
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
479
#endif
480
  };
481

482
483
  const int n = 4;
  const int n2 = 8;
484
485

  int i, j;
486
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
487
  tran_low_t *outp = &out[0][0];
488
  int outstride = n;
489

Lester Lu's avatar
Lester Lu committed
490
491
492
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
493
494
495
496
  int use_lgt_col =
      get_inv_lgt4(IHT_8x4[tx_type].cols, txfm_param, lgtmtx_col, 8);
  int use_lgt_row =
      get_inv_lgt8(IHT_8x4[tx_type].rows, txfm_param, lgtmtx_row, 4);
Lester Lu's avatar
Lester Lu committed
497
498
#endif

499
  // inverse transform row vectors and transpose
500
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
501
502
503
504
505
506
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
507
    for (j = 0; j < n2; ++j)
508
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
509
    input += n2;
510
511
512
  }

  // inverse transform column vectors
513
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
514
515
516
517
518
519
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
520
521
  }

522
#if CONFIG_EXT_TX
523
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
524
#endif
525
526

  // Sum with the destination
527
528
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
529
530
531
532
533
534
535
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

536
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
537
538
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
539
540
541
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
542
543
544
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
569
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
570
571
572
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
573
574
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
575
576
  int use_lgt_row =
      get_inv_lgt4(IHT_4x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
577
578
#endif

579
580
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
581
582
583
584
585
586
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
587
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
588
589
590
591
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
592
593
594
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
611
612
                          const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
613
614
615
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
616
617
618
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
639

640
641
642
643
  const int n = 4;
  const int n4 = 16;

  int i, j;
644
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
645
646
647
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
648
649
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
650
651
  int use_lgt_col =
      get_inv_lgt4(IHT_16x4[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
652
653
#endif

654
655
656
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
657
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
658
659
660
661
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
662
663
664
665
666
667
668
669
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
685
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
686
687
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
688
689
690
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
691
692
693
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
694
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
695
696
697
698
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
699
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
700
701
702
703
704
705
706
707
708
709
710
711
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
712
#endif
713
714
715
716
717
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
718
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
719
720
721
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
722
723
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
724
725
  int use_lgt_row =
      get_inv_lgt8(IHT_8x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
Lester Lu's avatar
Lester Lu committed
726
727
#endif

728
729
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
730
731
732
733
734
735
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
736
    for (j = 0; j < n; ++j)
737
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
738
    input += n;
739
740
741
742
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
743
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
744
745
  }

746
#if CONFIG_EXT_TX
747
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
748
#endif
749
750
751
752
753
754
755
756
757
758
759

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
760
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
761
762
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
763
764
765
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
766
767
768
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
769
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
770
771
772
773
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
774
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
775
776
777
778
779
780
781
782
783
784
785
786
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
787
#endif
788
  };
789

790
791
792
793
  const int n = 8;
  const int n2 = 16;

  int i, j;
794
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
795
796
797
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
798
799
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
800
801
  int use_lgt_col =
      get_inv_lgt8(IHT_16x8[tx_type].cols, txfm_param, lgtmtx_col, 16);
Lester Lu's avatar
Lester Lu committed
802
803
#endif

804
805
806
807
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
808
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
809
    input += n2;
810
811
812
813
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
814
815
816
817
818
819
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
820
821
  }

822
#if CONFIG_EXT_TX
823
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
824
#endif
825
826
827
828
829
830
831
832
833
834
835

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

836
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
837
838
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
839
840
841
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
842
843
844
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
869
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
870
871
872
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
873
874
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
875
876
  int use_lgt_row =
      get_inv_lgt8(IHT_8x32[tx_type].rows, txfm_param, lgtmtx_row, 32);
Lester Lu's avatar
Lester Lu committed
877
878
#endif

879
880
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
881
882
883
884
885
886
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
887
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
888
889
890
891
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
892
893
894
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
911
912
                           const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
913
914
915
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
916
917
918
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
939

940
941
942
943
  const int n = 8;
  const int n4 = 32;

  int i, j;
944
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
945
946
947
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
948
949
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
950
951
  int use_lgt_col =
      get_inv_lgt4(IHT_32x8[tx_type].cols, txfm_param, lgtmtx_col, 32);
Lester Lu's avatar
Lester Lu committed
952
953
#endif

954
955
956
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
957
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
958
959
960
961
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
962
963
964
965
966
967
968
969
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
985
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
986
987
                            const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
988
989
990
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
991
992
993
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
994
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
995
996
997
998
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
999
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
1012
#endif
1013
1014
1015
1016
1017
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
1018
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
1019
1020
1021
1022
1023
1024
1025
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
1026
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
1027
    input += n;
1028
1029
1030
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1031
  for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
1032

1033
#if CONFIG_EXT_TX
1034
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1035
#endif
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
1047
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1048
1049
                            const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
1050
1051
1052
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
1053
1054
1055
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
1056
  static const transform_2d IHT_32x16[] = {
Luca Barbato's avatar
Luca Barbato committed
1057
1058
1059
1060
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
1061
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST
1074
#endif
1075
1076
1077
1078
1079
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
1080
  tran_low_t out[32][16], tmp[32][16], outtmp[32];
1081
1082
1083
1084
1085
1086
1087
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
1088
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
1089
    input += n2;
1090
1091
1092
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1093
  for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
1094

1095
#if CONFIG_EXT_TX
1096
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1097
#endif
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
1109
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1110
1111
                         const TxfmParam *txfm_param) {
  int tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
1112
1113
1114
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
1115
1116
1117
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
1118
  static const transform_2d IHT_8[] = {
Luca Barbato's avatar
Luca Barbato committed
1119
1120
1121
1122
    { aom_idct8_c, aom_idct8_c },    // DCT_DCT  = 0
    { aom_iadst8_c, aom_idct8_c },   // ADST_DCT = 1
    { aom_idct8_c, aom_iadst8_c },   // DCT_ADST = 2
    { aom_iadst8_c, aom_iadst8_c },  // ADST_ADST = 3
1123
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
    { aom_iadst8_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst8_c },  // FLIPADST_FLIPADST