idct.c 92 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15
16
17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19
20
21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
22
23
#if CONFIG_DAALA_TX4 || CONFIG_DAALA_TX8 || CONFIG_DAALA_TX16 || \
    CONFIG_DAALA_TX32 || CONFIG_DAALA_TX64
24
25
#include "av1/common/daala_tx.h"
#endif
Jingning Han's avatar
Jingning Han committed
26

27
int av1_get_tx_scale(const TX_SIZE tx_size) {
28
29
  const int pels = tx_size_2d[tx_size];
  return (pels > 256) + (pels > 1024) + (pels > 4096);
30
31
}

32
33
34
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

Debargha Mukherjee's avatar
Debargha Mukherjee committed
35
36
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
37
  for (i = 0; i < 4; ++i) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
38
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
39
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
40
41
42
43
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
44
45
46
  for (i = 0; i < 8; ++i) {
    output[i] = input[i] * 2;
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
47
48
49
50
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
51
  for (i = 0; i < 16; ++i) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
52
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
53
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
54
55
56
57
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
58
59
60
  for (i = 0; i < 32; ++i) {
    output[i] = input[i] * 4;
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
61
}
62

63
#if CONFIG_TX64X64
64
65
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
66
  for (i = 0; i < 64; ++i) {
67
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
68
  }
69
70
}
#endif  // CONFIG_TX64X64
Debargha Mukherjee's avatar
Debargha Mukherjee committed
71

72
// For use in lieu of ADST
73
74
75
76
77
78
79
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
80
81
82
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
83
  aom_idct16_c(inputhalf, output + 16);
84
85
86
  // Note overall scaling factor is 4 times orthogonal
}

87
#if CONFIG_TX64X64
88
89
90
91
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
92
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
93
94
95
96
97
98
99
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
100
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
120
// Inverse identity transform and add.
121
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
122
                           int bsx, int bsy, TX_TYPE tx_type) {
123
  int r, c;
124
125
  const int pels = bsx * bsy;
  const int shift = 3 - ((pels > 256) + (pels > 1024));
Debargha Mukherjee's avatar
Debargha Mukherjee committed
126
  if (tx_type == IDTX) {
127
128
    for (r = 0; r < bsy; ++r) {
      for (c = 0; c < bsx; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
129
130
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
131
      input += bsx;
Jingning Han's avatar
Jingning Han committed
132
    }
133
134
135
  }
}

clang-format's avatar
clang-format committed
136
137
138
139
140
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
141

clang-format's avatar
clang-format committed
142
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
143
                               int *sstride, TX_TYPE tx_type, int sizey,
clang-format's avatar
clang-format committed
144
                               int sizex) {
145
146
147
148
149
150
151
152
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
153
    case IDTX:
Jingning Han's avatar
Jingning Han committed
154
155
    case V_DCT:
    case H_DCT:
156
    case V_ADST:
clang-format's avatar
clang-format committed
157
    case H_ADST: break;
158
159
    case FLIPADST_DCT:
    case FLIPADST_ADST:
160
    case V_FLIPADST:
161
      // flip UD
162
      FLIPUD_PTR(*dst, *dstride, sizey);
163
164
165
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
166
    case H_FLIPADST:
167
      // flip LR
168
      FLIPUD_PTR(*src, *sstride, sizex);
169
170
171
      break;
    case FLIPADST_FLIPADST:
      // flip UD
172
      FLIPUD_PTR(*dst, *dstride, sizey);
173
      // flip LR
174
      FLIPUD_PTR(*src, *sstride, sizex);
175
      break;
clang-format's avatar
clang-format committed
176
    default: assert(0); break;
177
178
179
  }
}

180
#if CONFIG_HIGHBITDEPTH
181
#if CONFIG_TX64X64
182
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
183
                                  int stride, int bsx, int bsy, TX_TYPE tx_type,
184
                                  int bd) {
185
  int r, c;
186
187
  const int pels = bsx * bsy;
  const int shift = 3 - ((pels > 256) + (pels > 1024));
188
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
189

Debargha Mukherjee's avatar
Debargha Mukherjee committed
190
  if (tx_type == IDTX) {
191
192
    for (r = 0; r < bsy; ++r) {
      for (c = 0; c < bsx; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
193
194
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
195
      input += bsx;
196
    }
197
198
  }
}
199
#endif  // CONFIG_TX64X64
200
#endif  // CONFIG_HIGHBITDEPTH
201

202
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
203
204
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
Lester Lu's avatar
Lester Lu committed
205
  if (!lgtmtx) assert(0);
206

Lester Lu's avatar
Lester Lu committed
207
  // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,4
Lester Lu's avatar
Lester Lu committed
208
209
210
211
212
213
214
215
216
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
Lester Lu's avatar
Lester Lu committed
217
  if (!lgtmtx) assert(0);
218

Lester Lu's avatar
Lester Lu committed
219
  // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,8
Lester Lu's avatar
Lester Lu committed
220
221
222
223
224
225
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}
226
#endif  // CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
227

228
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
229
230
231
232
// get_lgt4 and get_lgt8 return 1 and pick a lgt matrix if LGT is chosen to
// apply. Otherwise they return 0
int get_lgt4(const TxfmParam *txfm_param, int is_col,
             const tran_high_t **lgtmtx) {
233
  assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
Lester Lu's avatar
Lester Lu committed
234
235
236
237
238
239
240
  if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
                 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
    lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
    return 1;
  } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
                         htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
    lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
Lester Lu's avatar
Lester Lu committed
241
242
    return 1;
  }
Lester Lu's avatar
Lester Lu committed
243
  lgtmtx[0] = NULL;
Lester Lu's avatar
Lester Lu committed
244
245
246
  return 0;
}

Lester Lu's avatar
Lester Lu committed
247
248
int get_lgt8(const TxfmParam *txfm_param, int is_col,
             const tran_high_t **lgtmtx) {
249
  assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
Lester Lu's avatar
Lester Lu committed
250
251
252
253
254
255
256
  if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
                 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
    lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
    return 1;
  } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
                         htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
    lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
Lester Lu's avatar
Lester Lu committed
257
258
    return 1;
  }
Lester Lu's avatar
Lester Lu committed
259
  lgtmtx[0] = NULL;
Lester Lu's avatar
Lester Lu committed
260
261
262
263
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
264
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
265
                         const TxfmParam *txfm_param) {
266
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
267
268
269
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
270
#if !CONFIG_DAALA_TX4
271
272
273
274
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
275
#endif
276
  static const transform_2d IHT_4[] = {
277
#if CONFIG_DAALA_TX4
278
279
280
281
282
283
284
285
286
    { daala_idct4, daala_idct4 },  // DCT_DCT  = 0
    { daala_idst4, daala_idct4 },  // ADST_DCT = 1
    { daala_idct4, daala_idst4 },  // DCT_ADST = 2
    { daala_idst4, daala_idst4 },  // ADST_ADST = 3
    { daala_idst4, daala_idct4 },  // FLIPADST_DCT
    { daala_idct4, daala_idst4 },  // DCT_FLIPADST
    { daala_idst4, daala_idst4 },  // FLIPADST_FLIPADST
    { daala_idst4, daala_idst4 },  // ADST_FLIPADST
    { daala_idst4, daala_idst4 },  // FLIPADST_ADST
287
288
289
290
291
292
293
    { daala_idtx4, daala_idtx4 },  // IDTX
    { daala_idct4, daala_idtx4 },  // V_DCT
    { daala_idtx4, daala_idct4 },  // H_DCT
    { daala_idst4, daala_idtx4 },  // V_ADST
    { daala_idtx4, daala_idst4 },  // H_ADST
    { daala_idst4, daala_idtx4 },  // V_FLIPADST
    { daala_idtx4, daala_idst4 },  // H_FLIPADST
294
#else
Luca Barbato's avatar
Luca Barbato committed
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
311
#endif
312
313
314
  };

  int i, j;
315
  tran_low_t tmp[4][4];
316
317
318
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
319

320
321
322
323
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
324
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
325
326
327
328
  const tran_high_t *lgtmtx_col[1];
  const tran_high_t *lgtmtx_row[1];
  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
Lester Lu's avatar
Lester Lu committed
329
330
#endif

331
332
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
333
#if CONFIG_DAALA_TX4
334
    tran_low_t temp_in[4];
335
    for (j = 0; j < 4; j++) temp_in[j] = input[j] * 2;
336
337
    IHT_4[tx_type].rows(temp_in, out[i]);
#else
Lester Lu's avatar
Lester Lu committed
338
339
#if CONFIG_LGT
    if (use_lgt_row)
Lester Lu's avatar
Lester Lu committed
340
      ilgt4(input, out[i], lgtmtx_row[0]);
Lester Lu's avatar
Lester Lu committed
341
342
343
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
344
#endif
clang-format's avatar
clang-format committed
345
    input += 4;
346
347
348
  }

  // transpose
349
350
351
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
352
    }
353
354
355
356
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
357
358
#if CONFIG_LGT
    if (use_lgt_col)
Lester Lu's avatar
Lester Lu committed
359
      ilgt4(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu's avatar
Lester Lu committed
360
361
362
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
363
364
  }

365
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
366
367
368

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
369
    for (j = 0; j < 4; ++j) {
370
371
      int d = i * stride + j;
      int s = j * outstride + i;
372
#if CONFIG_DAALA_TX4
373
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
374
375
376
#else
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
377
378
379
380
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
381
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
382
                         const TxfmParam *txfm_param) {
383
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
384
385
386
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
387
388
389
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
390
  static const transform_2d IHT_4x8[] = {
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
    { daala_idct8, daala_idct4 },  // DCT_DCT  = 0
    { daala_idst8, daala_idct4 },  // ADST_DCT = 1
    { daala_idct8, daala_idst4 },  // DCT_ADST = 2
    { daala_idst8, daala_idst4 },  // ADST_ADST = 3
    { daala_idst8, daala_idct4 },  // FLIPADST_DCT
    { daala_idct8, daala_idst4 },  // DCT_FLIPADST
    { daala_idst8, daala_idst4 },  // FLIPADST_FLIPADST
    { daala_idst8, daala_idst4 },  // ADST_FLIPADST
    { daala_idst8, daala_idst4 },  // FLIPADST_ADST
    { daala_idtx8, daala_idtx4 },  // IDTX
    { daala_idct8, daala_idtx4 },  // V_DCT
    { daala_idtx8, daala_idct4 },  // H_DCT
    { daala_idst8, daala_idtx4 },  // V_ADST
    { daala_idtx8, daala_idst4 },  // H_ADST
    { daala_idst8, daala_idtx4 },  // V_FLIPADST
    { daala_idtx8, daala_idst4 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
425
#endif
426
427
  };

428
429
  const int n = 4;
  const int n2 = 8;
430
  int i, j;
431
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
432
  tran_low_t *outp = &out[0][0];
433
  int outstride = n2;
434

Lester Lu's avatar
Lester Lu committed
435
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
436
437
438
439
  const tran_high_t *lgtmtx_col[1];
  const tran_high_t *lgtmtx_row[1];
  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
Lester Lu's avatar
Lester Lu committed
440
441
#endif

442
443
444
445
446
447
  // Multi-way scaling matrix (bits):
  // LGT/AV1 row,col     input+0, rowTX+.5, mid+.5, colTX+1, out-5 == -3
  // LGT row, Daala col  input+0, rowTX+.5, mid+.5, colTX+0, out-4 == -3
  // Daala row, LGT col  input+1, rowTX+0,  mid+0,  colTX+1, out-5 == -3
  // Daala row,col       input+1, rowTX+0,  mid+0,  colTX+0, out-4 == -3

448
  // inverse transform row vectors and transpose
449
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
450
#if CONFIG_LGT
451
452
453
454
    if (use_lgt_row) {
      // Scaling cases 1 and 2 above
      // No input scaling
      // Row transform (LGT; scales up .5 bits)
Lester Lu's avatar
Lester Lu committed
455
      ilgt4(input, outtmp, lgtmtx_row[0]);
456
457
458
459
      // Transpose and mid scaling up by .5 bit
      for (j = 0; j < n; ++j)
        tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
    } else {
Lester Lu's avatar
Lester Lu committed
460
#endif
461
462
463
464
465
466
467
468
469
470
471
472
473
474
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
      // Daala row transform; Scaling cases 3 and 4 above
      tran_low_t temp_in[4];
      // Input scaling up by 1 bit
      for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
      // Row transform; Daala does not scale
      IHT_4x8[tx_type].rows(temp_in, outtmp);
      // Transpose; no mid scaling
      for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
#else
    // AV1 row transform; Scaling case 1 only
    // Row transform (AV1 scales up .5 bits)
    IHT_4x8[tx_type].rows(input, outtmp);
    // Transpose and mid scaling up by .5 bit
475
    for (j = 0; j < n; ++j)
476
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
477
478
479
480
#endif
#if CONFIG_LGT
    }
#endif
481
    input += n;
482
483
484
  }

  // inverse transform column vectors
485
  // AV1/LGT column TX scales up by 1 bit, Daala does not scale
486
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
487
488
#if CONFIG_LGT
    if (use_lgt_col)
Lester Lu's avatar
Lester Lu committed
489
      ilgt8(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu's avatar
Lester Lu committed
490
491
492
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
493
494
  }

495
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
496
497

  // Sum with the destination
498
499
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
500
501
      int d = i * stride + j;
      int s = j * outstride + i;
502
503
504
505
506
507
508
509
510
511
512
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
#if CONFIG_LGT
      if (use_lgt_col)
        // Output Scaling cases 1, 3
        dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
      else
#endif
        // Output scaling cases 2, 4
        dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
      // Output scaling case 1 only
513
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
514
#endif
515
516
517
518
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
519
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
520
                         const TxfmParam *txfm_param) {
521
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
522
523
524
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
525
526
527
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
528
  static const transform_2d IHT_8x4[] = {
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
    { daala_idct4, daala_idct8 },  // DCT_DCT  = 0
    { daala_idst4, daala_idct8 },  // ADST_DCT = 1
    { daala_idct4, daala_idst8 },  // DCT_ADST = 2
    { daala_idst4, daala_idst8 },  // ADST_ADST = 3
    { daala_idst4, daala_idct8 },  // FLIPADST_DCT
    { daala_idct4, daala_idst8 },  // DCT_FLIPADST
    { daala_idst4, daala_idst8 },  // FLIPADST_FLIPADST
    { daala_idst4, daala_idst8 },  // ADST_FLIPADST
    { daala_idst4, daala_idst8 },  // FLIPADST_ADST
    { daala_idtx4, daala_idtx8 },  // IDTX
    { daala_idct4, daala_idtx8 },  // V_DCT
    { daala_idtx4, daala_idct8 },  // H_DCT
    { daala_idst4, daala_idtx8 },  // V_ADST
    { daala_idtx4, daala_idst8 },  // H_ADST
    { daala_idst4, daala_idtx8 },  // V_FLIPADST
    { daala_idtx4, daala_idst8 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
563
#endif
564
  };
565

566
567
  const int n = 4;
  const int n2 = 8;
568
569

  int i, j;
570
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
571
  tran_low_t *outp = &out[0][0];
572
  int outstride = n;
573

Lester Lu's avatar
Lester Lu committed
574
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
575
576
577
578
  const tran_high_t *lgtmtx_col[1];
  const tran_high_t *lgtmtx_row[1];
  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
Lester Lu's avatar
Lester Lu committed
579
580
#endif

581
582
583
584
585
586
  // Multi-way scaling matrix (bits):
  // LGT/AV1 row,col     input+0, rowTX+1, mid+.5, colTX+.5, out-5 == -3
  // LGT row, Daala col  input+0, rowTX+1, mid+.5, colTX+.5, out-4 == -3
  // Daala row, LGT col  input+1, rowTX+0, mid+0,  colTX+1,  out-5 == -3
  // Daala row,col       input+1, rowTX+0, mid+0,  colTX+0,  out-4 == -3

587
  // inverse transform row vectors and transpose
588
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
589
#if CONFIG_LGT
590
591
592
593
    if (use_lgt_row) {
      // Scaling cases 1 and 2 above
      // No input scaling
      // Row transform (LGT; scales up 1 bit)
Lester Lu's avatar
Lester Lu committed
594
      ilgt8(input, outtmp, lgtmtx_row[0]);
595
596
597
598
      // Transpose and mid scaling up by .5 bit
      for (j = 0; j < n2; ++j)
        tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
    } else {
Lester Lu's avatar
Lester Lu committed
599
#endif
600
601
602
603
604
605
606
607
608
609
610
611
612
613
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
      // Daala row transform; Scaling cases 3 and 4 above
      tran_low_t temp_in[8];
      // Input scaling up by 1 bit
      for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
      // Row transform; Daala does not scale
      IHT_8x4[tx_type].rows(temp_in, outtmp);
      // Transpose; no mid scaling
      for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j];
#else
    // AV1 row transform; Scaling case 1 only
    // Row transform (AV1 scales up 1 bit)
    IHT_8x4[tx_type].rows(input, outtmp);
    // Transpose and mid scaling up by .5 bit
614
    for (j = 0; j < n2; ++j)
615
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
616
617
618
619
#endif
#if CONFIG_LGT
    }
#endif
620
    input += n2;
621
622
623
  }

  // inverse transform column vectors
624
  // AV1 and LGT scale up by .5 bits; Daala does not scale
625
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
626
627
#if CONFIG_LGT
    if (use_lgt_col)
Lester Lu's avatar
Lester Lu committed
628
      ilgt4(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu's avatar
Lester Lu committed
629
630
631
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
632
633
  }

634
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
635
636

  // Sum with the destination
637
638
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
639
640
      int d = i * stride + j;
      int s = j * outstride + i;
641
642
643
644
645
646
647
648
649
650
651
#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
#if CONFIG_LGT
      if (use_lgt_col)
        // Output scaling cases 1, 3
        dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
      else
#endif
        // Output scaling cases 2, 4
        dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
      // Output scaling case 1
652
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
653
#endif
654
655
656
657
    }
  }
}

658
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
659
                          const TxfmParam *txfm_param) {
660
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
661
662
663
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
664
665
666
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
689
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
690
691
692
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
693
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
694
695
  const tran_high_t *lgtmtx_row[1];
  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
Lester Lu's avatar
Lester Lu committed
696
697
#endif

698
699
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
700
701
#if CONFIG_LGT
    if (use_lgt_row)
Lester Lu's avatar
Lester Lu committed
702
      ilgt4(input, outtmp, lgtmtx_row[0]);
Lester Lu's avatar
Lester Lu committed
703
704
705
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
706
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
707
708
709
710
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
711
712
713
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
714
715
716
717
718
719
720
721
722
723
724
725
726
727

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
728
                          const TxfmParam *txfm_param) {
729
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
730
731
732
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
733
734
735
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
  };
754

755
756
757
758
  const int n = 4;
  const int n4 = 16;

  int i, j;
759
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
760
761
762
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
763
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
764
765
  const tran_high_t *lgtmtx_col[1];
  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
Lester Lu's avatar
Lester Lu committed
766
767
#endif

768
769
770
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
771
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
772
773
774
775
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
776
777
778
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
Lester Lu's avatar
Lester Lu committed
779
      ilgt4(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu's avatar
Lester Lu committed
780
781
782
783
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
784
785
786
787
788
789
790
791
792
793
794
795
796

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
797
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
798
                           const TxfmParam *txfm_param) {
799
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
800
801
802
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
803
804
805
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
806
  static const transform_2d IHT_8x16[] = {
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
    { daala_idct16, daala_idct8 },  // DCT_DCT  = 0
    { daala_idst16, daala_idct8 },  // ADST_DCT = 1
    { daala_idct16, daala_idst8 },  // DCT_ADST = 2
    { daala_idst16, daala_idst8 },  // ADST_ADST = 3
    { daala_idst16, daala_idct8 },  // FLIPADST_DCT
    { daala_idct16, daala_idst8 },  // DCT_FLIPADST
    { daala_idst16, daala_idst8 },  // FLIPADST_FLIPADST
    { daala_idst16, daala_idst8 },  // ADST_FLIPADST
    { daala_idst16, daala_idst8 },  // FLIPADST_ADST
    { daala_idtx16, daala_idtx8 },  // IDTX
    { daala_idct16, daala_idtx8 },  // V_DCT
    { daala_idtx16, daala_idct8 },  // H_DCT
    { daala_idst16, daala_idtx8 },  // V_ADST
    { daala_idtx16, daala_idst8 },  // H_ADST
    { daala_idst16, daala_idtx8 },  // V_FLIPADST
    { daala_idtx16, daala_idst8 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
841
#endif
842
843
844
845
846
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
847
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
848
849
850
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
851
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
852
853
  const tran_high_t *lgtmtx_row[1];
  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
Lester Lu's avatar
Lester Lu committed
854
855
#endif

856
857
858
859
860
861
  // Multi-way scaling matrix (bits):
  // LGT/AV1 row, AV1 col  input+0, rowTX+1, mid+.5, colTX+1.5, out-6 == -3
  // LGT row, Daala col    input+0, rowTX+1, mid+0,  colTX+0,   out-4 == -3
  // Daala row, LGT col    N/A (no 16-point LGT)
  // Daala row,col         input+1, rowTX+0, mid+0,  colTX+0,   out-4 == -3

862
863
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
864
#if CONFIG_LGT
865
866
867
868
    if (use_lgt_row) {
      // Scaling cases 1 and 2 above
      // No input scaling
      // Row transform (LGT; scales up 1 bit)
Lester Lu's avatar
Lester Lu committed
869
      ilgt8(input, outtmp, lgtmtx_row[0]);
870
871
872
873
874
875
876
877
878
879
880
      // Transpose and mid scaling
      for (j = 0; j < n; ++j) {
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
        // Mid scaling case 2
        tmp[j][i] = outtmp[j];
#else
        // Mid scaling case 1
        tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
#endif
      }
    } else {
Lester Lu's avatar
Lester Lu committed
881
#endif
882
883
884
885
886
887
888
889
890
891
892
893
894
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
      tran_low_t temp_in[8];
      // Input scaling case 4
      for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
      // Row transform (Daala does not scale)
      IHT_8x16[tx_type].rows(temp_in, outtmp);
      // Transpose (no mid scaling)
      for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
#else
    // Case 1; no input scaling
    // Row transform (AV1 scales up 1 bit)
    IHT_8x16[tx_type].rows(input, outtmp);
    // Transpose and mid scaling up .5 bits
895
    for (j = 0; j < n; ++j)
896
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
897
898
899
900
#endif
#if CONFIG_LGT
    }
#endif
clang-format's avatar
clang-format committed
901
    input += n;
902
903
904
  }

  // inverse transform column vectors
905
  // AV1 column TX scales up by 1.5 bit, Daala does not scale
906
  for (i = 0; i < n; ++i) {
907
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
908
909
910
911
912
913
914
915
916
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
917
918
919
920
921
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
      // Output scaling cases 2 and 4
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#else
      // Output scaling case 1
922
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
923
#endif
924
925
926
927
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
928
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
929
                           const TxfmParam *txfm_param) {
930
  const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker's avatar
Sarah Parker committed
931
932
933
#if CONFIG_MRC_TX
  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif  // CONFIG_MRC_TX
934
935
936
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
937
  static const transform_2d IHT_16x8[] = {
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
    { daala_idct8, daala_idct16 },  // DCT_DCT  = 0
    { daala_idst8, daala_idct16 },  // ADST_DCT = 1
    { daala_idct8, daala_idst16 },  // DCT_ADST = 2
    { daala_idst8, daala_idst16 },  // ADST_ADST = 3
    { daala_idst8, daala_idct16 },  // FLIPADST_DCT
    { daala_idct8, daala_idst16 },  // DCT_FLIPADST
    { daala_idst8, daala_idst16 },  // FLIPADST_FLIPADST
    { daala_idst8, daala_idst16 },  // ADST_FLIPADST
    { daala_idst8, daala_idst16 },  // FLIPADST_ADST
    { daala_idtx8, daala_idtx16 },  // IDTX
    { daala_idct8, daala_idtx16 },  // V_DCT
    { daala_idtx8, daala_idct16 },  // H_DCT
    { daala_idst8, daala_idtx16 },  // V_ADST
    { daala_idtx8, daala_idst16 },  // H_ADST
    { daala_idst8, daala_idtx16 },  // V_FLIPADST
    { daala_idtx8, daala_idst16 },  // H_FLIPADST
#else
Luca Barbato's avatar
Luca Barbato committed
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
972
#endif
973
  };
974

975
976
977
978
  const int n = 8;
  const int n2 = 16;

  int i, j;
979
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
980
981
982
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
983
#if CONFIG_LGT
Lester Lu's avatar
Lester Lu committed
984
985
  const tran_high_t *lgtmtx_col[1];
  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
Lester Lu's avatar
Lester Lu committed
986
987
#endif

988
989
990
991
992
993
  // Multi-way scaling matrix (bits):
  // AV1 row, LGT/AV1 col  input+0, rowTX+1.5, mid+.5, colTX+1, out-6 == -3
  // LGT row, Daala col    N/A (no 16-point LGT)
  // Daala row, LGT col    input+1, rowTX+0,   mid+1,  colTX+1, out-6 == -3
  // Daala row, col        input+1, rowTX+0,   mid+0,  colTX+0, out-4 == -3

994
995
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
996
997
998
999
1000
#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
    tran_low_t temp_in[16];
    // Input scaling cases 3 and 4
    for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
    // Daala row TX, no scaling
For faster browsing, not all history is shown. View entire blame