idct.c 87.4 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15
16
17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19
20
21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
22
23
#if CONFIG_DAALA_TX4 || CONFIG_DAALA_TX8 || CONFIG_DAALA_TX16 || \
    CONFIG_DAALA_TX32 || CONFIG_DAALA_TX64
24
25
#include "av1/common/daala_tx.h"
#endif
Jingning Han's avatar
Jingning Han committed
26

27
int av1_get_tx_scale(const TX_SIZE tx_size) {
28
29
  const int pels = tx_size_2d[tx_size];
  return (pels > 256) + (pels > 1024) + (pels > 4096);
30
31
}

32
33
34
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

Debargha Mukherjee's avatar
Debargha Mukherjee committed
35
36
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
37
  for (i = 0; i < 4; ++i) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
38
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
39
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
40
41
42
43
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
44
45
46
  for (i = 0; i < 8; ++i) {
    output[i] = input[i] * 2;
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
47
48
49
50
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
51
  for (i = 0; i < 16; ++i) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
52
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
53
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
54
55
56
57
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
58
59
60
  for (i = 0; i < 32; ++i) {
    output[i] = input[i] * 4;
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
61
}
62

63
#if CONFIG_TX64X64
64
65
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
66
  for (i = 0; i < 64; ++i) {
67
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
68
  }
69
70
}
#endif  // CONFIG_TX64X64
Debargha Mukherjee's avatar
Debargha Mukherjee committed
71

72
// For use in lieu of ADST
73
74
75
76
77
78
79
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
80
81
82
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
83
  aom_idct16_c(inputhalf, output + 16);
84
85
86
  // Note overall scaling factor is 4 times orthogonal
}

87
#if CONFIG_TX64X64
88
89
90
91
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
92
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
93
94
95
96
97
98
99
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
100
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
120
// Inverse identity transform and add.
121
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
122
                           int bsx, int bsy, TX_TYPE tx_type) {
123
  int r, c;
124
125
  const int pels = bsx * bsy;
  const int shift = 3 - ((pels > 256) + (pels > 1024));
Debargha Mukherjee's avatar
Debargha Mukherjee committed
126
  if (tx_type == IDTX) {
127
128
    for (r = 0; r < bsy; ++r) {
      for (c = 0; c < bsx; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
129
130
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
131
      input += bsx;
Jingning Han's avatar
Jingning Han committed
132
    }
133
134
135
  }
}

clang-format's avatar
clang-format committed
136
137
138
139
140
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
141

clang-format's avatar
clang-format committed
142
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
143
                               int *sstride, TX_TYPE tx_type, int sizey,
clang-format's avatar
clang-format committed
144
                               int sizex) {
145
146
147
148
149
150
151
152
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
153
    case IDTX:
Jingning Han's avatar
Jingning Han committed
154
155
    case V_DCT:
    case H_DCT:
156
    case V_ADST:
clang-format's avatar
clang-format committed
157
    case H_ADST: break;
158
159
    case FLIPADST_DCT:
    case FLIPADST_ADST:
160
    case V_FLIPADST:
161
      // flip UD
162
      FLIPUD_PTR(*dst, *dstride, sizey);
163
164
165
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
166
    case H_FLIPADST:
167
      // flip LR
168
      FLIPUD_PTR(*src, *sstride, sizex);
169
170
171
      break;
    case FLIPADST_FLIPADST:
      // flip UD
172
      FLIPUD_PTR(*dst, *dstride, sizey);
173
      // flip LR
174
      FLIPUD_PTR(*src, *sstride, sizex);
175
      break;
clang-format's avatar
clang-format committed
176
    default: assert(0); break;
177
178
179
  }
}

180
#if CONFIG_HIGHBITDEPTH
181
#if CONFIG_TX64X64
182
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
183
                                  int stride, int bsx, int bsy, TX_TYPE tx_type,
184
                                  int bd) {
185
  int r, c;
186
187
  const int pels = bsx * bsy;
  const int shift = 3 - ((pels > 256) + (pels > 1024));
188
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
189

Debargha Mukherjee's avatar
Debargha Mukherjee committed
190
  if (tx_type == IDTX) {
191
192
    for (r = 0; r < bsy; ++r) {
      for (c = 0; c < bsx; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
193
194
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
195
      input += bsx;
196
    }
197
198
  }
}
199
#endif  // CONFIG_TX64X64
200
#endif  // CONFIG_HIGHBITDEPTH
201

Yaowu Xu's avatar
Yaowu Xu committed
202
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
203
                         const TxfmParam *txfm_param) {
204
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
205
206
207
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
208
#if !CONFIG_DAALA_TX4
209
210
211
212
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
213
#endif
214
  static const transform_2d IHT_4[] = {
215
#if CONFIG_DAALA_TX4
216
217
218
219
220
221
222
223
224
    { daala_idct4, daala_idct4 },  // DCT_DCT  = 0
    { daala_idst4, daala_idct4 },  // ADST_DCT = 1
    { daala_idct4, daala_idst4 },  // DCT_ADST = 2
    { daala_idst4, daala_idst4 },  // ADST_ADST = 3
    { daala_idst4, daala_idct4 },  // FLIPADST_DCT
    { daala_idct4, daala_idst4 },  // DCT_FLIPADST
    { daala_idst4, daala_idst4 },  // FLIPADST_FLIPADST
    { daala_idst4, daala_idst4 },  // ADST_FLIPADST
    { daala_idst4, daala_idst4 },  // FLIPADST_ADST
225
226
227
228
229
230
231
    { daala_idtx4, daala_idtx4 },  // IDTX
    { daala_idct4, daala_idtx4 },  // V_DCT
    { daala_idtx4, daala_idct4 },  // H_DCT
    { daala_idst4, daala_idtx4 },  // V_ADST
    { daala_idtx4, daala_idst4 },  // H_ADST
    { daala_idst4, daala_idtx4 },  // V_FLIPADST
    { daala_idtx4, daala_idst4 },  // H_FLIPADST
232
#else
Luca Barbato's avatar
Luca Barbato committed
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
249
#endif
250
251
252
  };

  int i, j;
253
  tran_low_t tmp[4][4];
254
255
256
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
257

258
259
260
261
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

262
263
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
264
#if CONFIG_DAALA_TX4
265
    tran_low_t temp_in[4];
266
    for (j = 0; j < 4; j++) temp_in[j] = input[j] * 2;
267
268
    IHT_4[tx_type].rows(temp_in, out[i]);
#else
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
269
    IHT_4[tx_type].rows(input, out[i]);
270
#endif
clang-format's avatar
clang-format committed
271
    input += 4;
272
273
274
  }

  // transpose
275
276
277
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
278
    }
279
280
281
282
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
283
    IHT_4[tx_type].cols(tmp[i], out[i]);
284
285
  }

286
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
287
288
289

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
290
    for (j = 0; j < 4; ++j) {
291
292
      int d = i * stride + j;
      int s = j * outstride + i;
293
#if CONFIG_DAALA_TX4
294
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
295
296
297
#else
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
298
299
300
301
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
302
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
303
                         const TxfmParam *txfm_param) {
304
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
305
306
307
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
308
309
310
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
311
  static const transform_2d IHT_4x8[] = {
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
    { daala_idct8, daala_idct4 },  // DCT_DCT  = 0
    { daala_idst8, daala_idct4 },  // ADST_DCT = 1
    { daala_idct8, daala_idst4 },  // DCT_ADST = 2
    { daala_idst8, daala_idst4 },  // ADST_ADST = 3
    { daala_idst8, daala_idct4 },  // FLIPADST_DCT
    { daala_idct8, daala_idst4 },  // DCT_FLIPADST
    { daala_idst8, daala_idst4 },  // FLIPADST_FLIPADST
    { daala_idst8, daala_idst4 },  // ADST_FLIPADST
    { daala_idst8, daala_idst4 },  // FLIPADST_ADST
    { daala_idtx8, daala_idtx4 },  // IDTX
    { daala_idct8, daala_idtx4 },  // V_DCT
    { daala_idtx8, daala_idct4 },  // H_DCT
    { daala_idst8, daala_idtx4 },  // V_ADST
    { daala_idtx8, daala_idst4 },  // H_ADST
    { daala_idst8, daala_idtx4 },  // V_FLIPADST
    { daala_idtx8, daala_idst4 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
346
#endif
347
348
  };

349
350
  const int n = 4;
  const int n2 = 8;
351
  int i, j;
352
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
353
  tran_low_t *outp = &out[0][0];
354
  int outstride = n2;
355

356
357
358
359
360
361
  // Multi-way scaling matrix (bits):
  // LGT/AV1 row,col     input+0, rowTX+.5, mid+.5, colTX+1, out-5 == -3
  // LGT row, Daala col  input+0, rowTX+.5, mid+.5, colTX+0, out-4 == -3
  // Daala row, LGT col  input+1, rowTX+0,  mid+0,  colTX+1, out-5 == -3
  // Daala row,col       input+1, rowTX+0,  mid+0,  colTX+0, out-4 == -3

362
  // inverse transform row vectors and transpose
363
  for (i = 0; i < n2; ++i) {
364
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
365
366
367
368
369
370
371
372
    // Daala row transform; Scaling cases 3 and 4 above
    tran_low_t temp_in[4];
    // Input scaling up by 1 bit
    for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
    // Row transform; Daala does not scale
    IHT_4x8[tx_type].rows(temp_in, outtmp);
    // Transpose; no mid scaling
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
373
374
375
376
377
#else
    // AV1 row transform; Scaling case 1 only
    // Row transform (AV1 scales up .5 bits)
    IHT_4x8[tx_type].rows(input, outtmp);
    // Transpose and mid scaling up by .5 bit
378
    for (j = 0; j < n; ++j)
379
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
380
#endif
381
    input += n;
382
383
384
  }

  // inverse transform column vectors
385
  // AV1/LGT column TX scales up by 1 bit, Daala does not scale
386
  for (i = 0; i < n; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
387
    IHT_4x8[tx_type].cols(tmp[i], out[i]);
388
389
  }

390
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
391
392

  // Sum with the destination
393
394
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
395
396
      int d = i * stride + j;
      int s = j * outstride + i;
397
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
398
399
      // Output scaling cases 2, 4
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
400
401
#else
      // Output scaling case 1 only
402
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
403
#endif
404
405
406
407
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
408
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
409
                         const TxfmParam *txfm_param) {
410
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
411
412
413
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
414
415
416
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
417
  static const transform_2d IHT_8x4[] = {
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
    { daala_idct4, daala_idct8 },  // DCT_DCT  = 0
    { daala_idst4, daala_idct8 },  // ADST_DCT = 1
    { daala_idct4, daala_idst8 },  // DCT_ADST = 2
    { daala_idst4, daala_idst8 },  // ADST_ADST = 3
    { daala_idst4, daala_idct8 },  // FLIPADST_DCT
    { daala_idct4, daala_idst8 },  // DCT_FLIPADST
    { daala_idst4, daala_idst8 },  // FLIPADST_FLIPADST
    { daala_idst4, daala_idst8 },  // ADST_FLIPADST
    { daala_idst4, daala_idst8 },  // FLIPADST_ADST
    { daala_idtx4, daala_idtx8 },  // IDTX
    { daala_idct4, daala_idtx8 },  // V_DCT
    { daala_idtx4, daala_idct8 },  // H_DCT
    { daala_idst4, daala_idtx8 },  // V_ADST
    { daala_idtx4, daala_idst8 },  // H_ADST
    { daala_idst4, daala_idtx8 },  // V_FLIPADST
    { daala_idtx4, daala_idst8 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
452
#endif
453
  };
454

455
456
  const int n = 4;
  const int n2 = 8;
457
458

  int i, j;
459
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
460
  tran_low_t *outp = &out[0][0];
461
  int outstride = n;
462

463
464
465
466
467
468
  // Multi-way scaling matrix (bits):
  // LGT/AV1 row,col     input+0, rowTX+1, mid+.5, colTX+.5, out-5 == -3
  // LGT row, Daala col  input+0, rowTX+1, mid+.5, colTX+.5, out-4 == -3
  // Daala row, LGT col  input+1, rowTX+0, mid+0,  colTX+1,  out-5 == -3
  // Daala row,col       input+1, rowTX+0, mid+0,  colTX+0,  out-4 == -3

469
  // inverse transform row vectors and transpose
470
  for (i = 0; i < n; ++i) {
471
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
472
473
474
475
476
477
478
479
    // Daala row transform; Scaling cases 3 and 4 above
    tran_low_t temp_in[8];
    // Input scaling up by 1 bit
    for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
    // Row transform; Daala does not scale
    IHT_8x4[tx_type].rows(temp_in, outtmp);
    // Transpose; no mid scaling
    for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j];
480
481
482
483
484
#else
    // AV1 row transform; Scaling case 1 only
    // Row transform (AV1 scales up 1 bit)
    IHT_8x4[tx_type].rows(input, outtmp);
    // Transpose and mid scaling up by .5 bit
485
    for (j = 0; j < n2; ++j)
486
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
487
#endif
488
    input += n2;
489
490
491
  }

  // inverse transform column vectors
492
  // AV1 and LGT scale up by .5 bits; Daala does not scale
493
  for (i = 0; i < n2; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
494
    IHT_8x4[tx_type].cols(tmp[i], out[i]);
495
496
  }

497
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
498
499

  // Sum with the destination
500
501
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
502
503
      int d = i * stride + j;
      int s = j * outstride + i;
504
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
505
506
      // Output scaling cases 2, 4
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
507
508
#else
      // Output scaling case 1
509
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
510
#endif
511
512
513
514
    }
  }
}

515
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
516
                          const TxfmParam *txfm_param) {
517
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
518
519
520
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
521
522
523
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
546
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
547
548
549
550
551
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
552
    IHT_4x16[tx_type].rows(input, outtmp);
553
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
554
555
556
557
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
558
559
560
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
561
562
563
564
565
566
567
568
569
570
571
572
573
574

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
575
                          const TxfmParam *txfm_param) {
576
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
577
578
579
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
580
581
582
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
  };
601

602
603
604
605
  const int n = 4;
  const int n4 = 16;

  int i, j;
606
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
607
608
609
610
611
612
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
613
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
614
615
616
617
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
618
  for (i = 0; i < n4; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
619
    IHT_16x4[tx_type].cols(tmp[i], out[i]);
Lester Lu's avatar
Lester Lu committed
620
  }
621
622
623
624
625
626
627
628
629
630
631
632
633

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
634
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
635
                           const TxfmParam *txfm_param) {
636
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
637
638
639
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
640
641
642
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
643
  static const transform_2d IHT_8x16[] = {
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
    { daala_idct16, daala_idct8 },  // DCT_DCT  = 0
    { daala_idst16, daala_idct8 },  // ADST_DCT = 1
    { daala_idct16, daala_idst8 },  // DCT_ADST = 2
    { daala_idst16, daala_idst8 },  // ADST_ADST = 3
    { daala_idst16, daala_idct8 },  // FLIPADST_DCT
    { daala_idct16, daala_idst8 },  // DCT_FLIPADST
    { daala_idst16, daala_idst8 },  // FLIPADST_FLIPADST
    { daala_idst16, daala_idst8 },  // ADST_FLIPADST
    { daala_idst16, daala_idst8 },  // FLIPADST_ADST
    { daala_idtx16, daala_idtx8 },  // IDTX
    { daala_idct16, daala_idtx8 },  // V_DCT
    { daala_idtx16, daala_idct8 },  // H_DCT
    { daala_idst16, daala_idtx8 },  // V_ADST
    { daala_idtx16, daala_idst8 },  // H_ADST
    { daala_idst16, daala_idtx8 },  // V_FLIPADST
    { daala_idtx16, daala_idst8 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
678
#endif
679
680
681
682
683
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
684
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
685
686
687
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

688
689
690
691
692
693
  // Multi-way scaling matrix (bits):
  // LGT/AV1 row, AV1 col  input+0, rowTX+1, mid+.5, colTX+1.5, out-6 == -3
  // LGT row, Daala col    input+0, rowTX+1, mid+0,  colTX+0,   out-4 == -3
  // Daala row, LGT col    N/A (no 16-point LGT)
  // Daala row,col         input+1, rowTX+0, mid+0,  colTX+0,   out-4 == -3

694
695
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
696
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
697
698
699
700
701
702
703
    tran_low_t temp_in[8];
    // Input scaling case 4
    for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
    // Row transform (Daala does not scale)
    IHT_8x16[tx_type].rows(temp_in, outtmp);
    // Transpose (no mid scaling)
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
704
705
706
707
708
#else
    // Case 1; no input scaling
    // Row transform (AV1 scales up 1 bit)
    IHT_8x16[tx_type].rows(input, outtmp);
    // Transpose and mid scaling up .5 bits
709
    for (j = 0; j < n; ++j)
710
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
711
#endif
clang-format's avatar
clang-format committed
712
    input += n;
713
714
715
  }

  // inverse transform column vectors
716
  // AV1 column TX scales up by 1.5 bit, Daala does not scale
717
  for (i = 0; i < n; ++i) {
718
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
719
720
721
722
723
724
725
726
727
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
728
729
730
731
732
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
      // Output scaling cases 2 and 4
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
      // Output scaling case 1
733
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
734
#endif
735
736
737
738
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
739
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
740
                           const TxfmParam *txfm_param) {
741
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
742
743
744
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
745
746
747
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
748
  static const transform_2d IHT_16x8[] = {
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
    { daala_idct8, daala_idct16 },  // DCT_DCT  = 0
    { daala_idst8, daala_idct16 },  // ADST_DCT = 1
    { daala_idct8, daala_idst16 },  // DCT_ADST = 2
    { daala_idst8, daala_idst16 },  // ADST_ADST = 3
    { daala_idst8, daala_idct16 },  // FLIPADST_DCT
    { daala_idct8, daala_idst16 },  // DCT_FLIPADST
    { daala_idst8, daala_idst16 },  // FLIPADST_FLIPADST
    { daala_idst8, daala_idst16 },  // ADST_FLIPADST
    { daala_idst8, daala_idst16 },  // FLIPADST_ADST
    { daala_idtx8, daala_idtx16 },  // IDTX
    { daala_idct8, daala_idtx16 },  // V_DCT
    { daala_idtx8, daala_idct16 },  // H_DCT
    { daala_idst8, daala_idtx16 },  // V_ADST
    { daala_idtx8, daala_idst16 },  // H_ADST
    { daala_idst8, daala_idtx16 },  // V_FLIPADST
    { daala_idtx8, daala_idst16 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
783
#endif
784
  };
785

786
787
788
789
  const int n = 8;
  const int n2 = 16;

  int i, j;
790
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
791
792
793
  tran_low_t *outp = &out[0][0];
  int outstride = n;

794
795
796
797
798
799
  // Multi-way scaling matrix (bits):
  // AV1 row, LGT/AV1 col  input+0, rowTX+1.5, mid+.5, colTX+1, out-6 == -3
  // LGT row, Daala col    N/A (no 16-point LGT)
  // Daala row, LGT col    input+1, rowTX+0,   mid+1,  colTX+1, out-6 == -3
  // Daala row, col        input+1, rowTX+0,   mid+0,  colTX+0, out-4 == -3

800
801
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
802
803
804
805
806
807
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
    tran_low_t temp_in[16];
    // Input scaling cases 3 and 4
    for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
    // Daala row TX, no scaling
    IHT_16x8[tx_type].rows(temp_in, outtmp);
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
808
809
810
    // Transpose and mid scaling
    // Case 4
    for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j];
811
812
813
814
#else
    // Case 1
    // No input scaling
    // Row transform, AV1 scales up by 1.5 bits
815
    IHT_16x8[tx_type].rows(input, outtmp);
816
    // Transpose and mid scaling up .5 bits
817
    for (j = 0; j < n2; ++j)
818
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
819
#endif
clang-format's avatar
clang-format committed
820
    input += n2;
821
822
823
  }

  // inverse transform column vectors
824
  // AV!/LGT scales up by 1 bit, Daala does not scale
825
  for (i = 0; i < n2; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
826
    IHT_16x8[tx_type].cols(tmp[i], out[i]);
827
828
829
830
831
832
833
834
835
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
836
837
// Output scaling
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
838
839
      // case 4
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
840
841
#else
      // case 1
842
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
843
#endif
844
845
846
847
    }
  }
}

848
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
849
                           const TxfmParam *txfm_param) {
850
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
851
852
853
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
854
855
856
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
879
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
880
881
882
883
884
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
885
    IHT_8x32[tx_type].rows(input, outtmp);
886
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
887
888
889
890
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
891
892
893
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
894
895
896
897
898
899
900
901
902
903
904
905
906
907

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
908
                           const TxfmParam *txfm_param) {
909
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
910
911
912
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
913
914
915
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
  };
934

935
936
937
938
  const int n = 8;
  const int n4 = 32;

  int i, j;
939
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
940
941
942
943
944
945
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
946
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
947
948
949
950
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
951
  for (i = 0; i < n4; ++i) {
Sebastien Alaiwan's avatar
Sebastien Alaiwan committed
952
    IHT_32x8[tx_type].cols(tmp[i], out[i]);
Lester Lu's avatar
Lester Lu committed
953
  }
954
955
956
957
958
959
960
961
962
963
964
965
966

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
967
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
968
                            const TxfmParam *txfm_param) {
969
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
970
971
972
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
973
974
975
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
976
  static const transform_2d IHT_16x32[] = {
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
    { daala_idct32, daala_idct16 },  // DCT_DCT  = 0
    { daala_idst32, daala_idct16 },  // ADST_DCT = 1
    { daala_idct32, daala_idst16 },  // DCT_ADST = 2
    { daala_idst32, daala_idst16 },  // ADST_ADST = 3
    { daala_idst32, daala_idct16 },  // FLIPADST_DCT
    { daala_idct32, daala_idst16 },  // DCT_FLIPADST
    { daala_idst32, daala_idst16 },  // FLIPADST_FLIPADST
    { daala_idst32, daala_idst16 },  // ADST_FLIPADST
    { daala_idst32, daala_idst16 },  // FLIPADST_ADST
    { daala_idtx32, daala_idtx16 },  // IDTX
    { daala_idct32, daala_idtx16 },  // V_DCT
    { daala_idtx32, daala_idct16 },  // H_DCT
    { daala_idst32, daala_idtx16 },  // V_ADST
    { daala_idtx32, daala_idst16 },  // H_ADST
    { daala_idst32, daala_idtx16 },  // V_FLIPADST
    { daala_idtx32, daala_idst16 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
1011
#endif
1012
1013
1014
1015
1016
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
1017
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
1018
1019
1020
1021
1022
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
1023
1024
1025
1026
1027
1028
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
    tran_low_t temp_in[16];
    for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
    IHT_16x32[tx_type].rows(temp_in, outtmp);
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j] * 4;
#else
1029
1030
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
1031
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
1032
#endif
clang-format's avatar
clang-format committed
1033
    input += n;
1034
1035
1036
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1037
  for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
1038
1039
1040
1041
1042
1043
1044
1045

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
1046
1047
1048
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
#else
1049
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1050
#endif
1051
1052
1053
1054
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
1055
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1056
                            const TxfmParam *txfm_param) {
1057
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
1058
1059
1060
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
1061
1062
1063
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
1064
  static const transform_2d IHT_32x16[] = {
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
    { daala_idct16, daala_idct32 },  // DCT_DCT  = 0
    { daala_idst16, daala_idct32 },  // ADST_DCT = 1
    { daala_idct16, daala_idst32 },  // DCT_ADST = 2
    { daala_idst16, daala_idst32 },  // ADST_ADST = 3
    { daala_idst16, daala_idct32 },  // FLIPADST_DCT
    { daala_idct16, daala_idst32 },  // DCT_FLIPADST
    { daala_idst16, daala_idst32 },  // FLIPADST_FLIPADST
    { daala_idst16, daala_idst32 },  // ADST_FLIPADST
    { daala_idst16, daala_idst32 },  // FLIPADST_ADST
    { daala_idtx16, daala_idtx32 },  // IDTX
    { daala_idct16, daala_idtx32 },  // V_DCT
    { daala_idtx16, daala_idct32 },  // H_DCT
    { daala_idst16, daala_idtx32 },  // V_ADST
    { daala_idtx16, daala_idst32 },  // H_ADST
    { daala_idst16, daala_idtx32 },  // V_FLIPADST
    { daala_idtx16, daala_idst32 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST
1099
#endif
1100
1101
1102
1103
1104
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
1105
  tran_low_t out[32][16], tmp[32][16], outtmp[32];
1106
1107
1108
1109
1110
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
1111
1112
1113
1114
1115
1116
#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
    tran_low_t temp_in[32];
    for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
    IHT_32x16[tx_type].rows(temp_in, outtmp);
    for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 4;
#else
1117
1118
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
1119
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
1120
#endif
clang-format's avatar
clang-format committed
1121
    input += n2;
1122
1123
1124
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
1125
  for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
1126
1127
1128
1129
1130
1131