idct.c 74.4 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23
int av1_get_tx_scale(const TX_SIZE tx_size) {
24 25 26 27 28 29 30
  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
    return 2;
#endif  // CONFIG_TX64X64
  else
    return 0;
31 32
}

33 34 35
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

36
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
37 38
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
39 40 41 42
  for (i = 0; i < 4; ++i) {
#if CONFIG_DAALA_DCT4
    output[i] = input[i];
#else
Debargha Mukherjee's avatar
Debargha Mukherjee committed
43
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
44 45
#endif
  }
Debargha Mukherjee's avatar
Debargha Mukherjee committed
46 47 48 49
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
50
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
51 52 53 54 55 56 57 58 59 60
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
61
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
62
}
63 64 65 66 67 68 69 70

#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 64; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif  // CONFIG_TX64X64
71
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
72

73
// For use in lieu of ADST
74 75 76 77 78 79 80
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
81 82 83
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
84
  aom_idct16_c(inputhalf, output + 16);
85 86 87
  // Note overall scaling factor is 4 times orthogonal
}

88 89 90 91 92
#if CONFIG_TX64X64
static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
93
  av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
94 95 96 97 98 99 100
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
  int32_t in[64], out[64];
  int i;
  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
101
  av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}

// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[32];
  // Multiply input by sqrt(2)
  for (i = 0; i < 32; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  for (i = 0; i < 32; ++i) {
    output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
  }
  aom_idct32_c(inputhalf, output + 32);
  // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
}
#endif  // CONFIG_TX64X64

Jingning Han's avatar
Jingning Han committed
121
// Inverse identity transform and add.
122
#if CONFIG_EXT_TX
123
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
124
                           int bs, int tx_type) {
125
  int r, c;
126
  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
127
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
128 129
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
130 131 132
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
133
    }
134 135
  }
}
136
#endif  // CONFIG_EXT_TX
137

clang-format's avatar
clang-format committed
138 139 140 141 142
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
143

144
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
145 146 147
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
148 149 150 151 152 153 154 155
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
156
    case IDTX:
Jingning Han's avatar
Jingning Han committed
157 158
    case V_DCT:
    case H_DCT:
159
    case V_ADST:
clang-format's avatar
clang-format committed
160
    case H_ADST: break;
161 162
    case FLIPADST_DCT:
    case FLIPADST_ADST:
163
    case V_FLIPADST:
164
      // flip UD
165
      FLIPUD_PTR(*dst, *dstride, sizey);
166 167 168
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
169
    case H_FLIPADST:
170
      // flip LR
171
      FLIPUD_PTR(*src, *sstride, sizex);
172 173 174
      break;
    case FLIPADST_FLIPADST:
      // flip UD
175
      FLIPUD_PTR(*dst, *dstride, sizey);
176
      // flip LR
177
      FLIPUD_PTR(*src, *sstride, sizex);
178
      break;
clang-format's avatar
clang-format committed
179
    default: assert(0); break;
180 181
  }
}
182
#endif  // CONFIG_EXT_TX
183

184
#if CONFIG_HIGHBITDEPTH
185
#if CONFIG_EXT_TX && CONFIG_TX64X64
186
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
187
                                  int stride, int bs, int tx_type, int bd) {
188 189 190
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
191

Debargha Mukherjee's avatar
Debargha Mukherjee committed
192
  if (tx_type == IDTX) {
193 194
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
195 196 197
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
198
    }
199 200
  }
}
201
#endif  // CONFIG_EXT_TX && CONFIG_TX64X64
202
#endif  // CONFIG_HIGHBITDEPTH
203

Lester Lu's avatar
Lester Lu committed
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
#if CONFIG_LGT
void ilgt4(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  if (!(input[0] | input[1] | input[2] | input[3])) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
  tran_high_t s[4] = { 0 };
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];

  for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

void ilgt8(const tran_low_t *input, tran_low_t *output,
           const tran_high_t *lgtmtx) {
  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
  tran_high_t s[8] = { 0 };
  for (int i = 0; i < 8; ++i)
    for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];

  for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
}

// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
int get_inv_lgt4(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst4_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
    return 1;
  }
  return 0;
}

int get_inv_lgt8(transform_1d tx_orig, const INV_TXFM_PARAM *inv_txfm_param,
                 const tran_high_t *lgtmtx[], int ntx) {
  // inter/intra split
  if (tx_orig == &aom_iadst8_c) {
    for (int i = 0; i < ntx; ++i)
      lgtmtx[i] = inv_txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
    return 1;
  }
  return 0;
}
#endif  // CONFIG_LGT

Yaowu Xu's avatar
Yaowu Xu committed
254
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
255 256
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
257
#if !CONFIG_DAALA_DCT4
258 259 260 261
  if (tx_type == DCT_DCT) {
    aom_idct4x4_16_add(input, dest, stride);
    return;
  }
262
#endif
263
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
264 265 266 267
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
268
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
269 270 271 272 273 274 275 276 277 278 279 280
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
281
#endif
282 283 284
  };

  int i, j;
285
  tran_low_t tmp[4][4];
286 287 288
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
289

290 291 292 293
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif

Lester Lu's avatar
Lester Lu committed
294 295 296 297 298 299 300
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_4[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4[tx_type].rows, param, lgtmtx_row, 4);
#endif

301 302
  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
303 304 305 306 307
#if CONFIG_DAALA_DCT4
    tran_low_t temp_in[4];
    for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
    IHT_4[tx_type].rows(temp_in, out[i]);
#else
Lester Lu's avatar
Lester Lu committed
308 309 310 311 312 313
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, out[i], lgtmtx_row[i]);
    else
#endif
      IHT_4[tx_type].rows(input, out[i]);
314
#endif
clang-format's avatar
clang-format committed
315
    input += 4;
316 317 318
  }

  // transpose
319 320 321
  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      tmp[j][i] = out[i][j];
322
    }
323 324 325 326
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
Lester Lu's avatar
Lester Lu committed
327 328 329 330 331 332
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4[tx_type].cols(tmp[i], out[i]);
333 334 335
  }

#if CONFIG_EXT_TX
336
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
337 338 339 340
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
341
    for (j = 0; j < 4; ++j) {
342 343
      int d = i * stride + j;
      int s = j * outstride + i;
344
#if CONFIG_DAALA_DCT4
345
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
346 347 348
#else
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
#endif
349 350 351 352
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
353
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
354 355
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
356 357 358
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
359
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
360 361 362 363
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
364
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
365 366 367 368 369 370 371 372 373 374 375 376
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
377
#endif
378 379
  };

380 381
  const int n = 4;
  const int n2 = 8;
382
  int i, j;
383
  tran_low_t out[4][8], tmp[4][8], outtmp[4];
384
  tran_low_t *outp = &out[0][0];
385
  int outstride = n2;
386

Lester Lu's avatar
Lester Lu committed
387 388 389 390 391 392 393
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[4];
  const tran_high_t *lgtmtx_row[8];
  int use_lgt_col = get_inv_lgt8(IHT_4x8[tx_type].cols, param, lgtmtx_col, 4);
  int use_lgt_row = get_inv_lgt4(IHT_4x8[tx_type].rows, param, lgtmtx_row, 8);
#endif

394
  // inverse transform row vectors and transpose
395
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
396 397 398 399 400 401
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x8[tx_type].rows(input, outtmp);
402
    for (j = 0; j < n; ++j)
403
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
404
    input += n;
405 406 407
  }

  // inverse transform column vectors
408
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
409 410 411 412 413 414
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_4x8[tx_type].cols(tmp[i], out[i]);
415 416
  }

417
#if CONFIG_EXT_TX
418
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
419
#endif
420 421

  // Sum with the destination
422 423
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
424 425 426 427 428 429 430
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
431
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
432 433
                         const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
434 435 436
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
437
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
438 439 440 441
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
442
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
443 444 445 446 447 448 449 450 451 452 453 454
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
455
#endif
456
  };
457

458 459
  const int n = 4;
  const int n2 = 8;
460 461

  int i, j;
462
  tran_low_t out[8][4], tmp[8][4], outtmp[8];
463
  tran_low_t *outp = &out[0][0];
464
  int outstride = n;
465

Lester Lu's avatar
Lester Lu committed
466 467 468 469 470 471 472
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[8];
  const tran_high_t *lgtmtx_row[4];
  int use_lgt_col = get_inv_lgt4(IHT_8x4[tx_type].cols, param, lgtmtx_col, 8);
  int use_lgt_row = get_inv_lgt8(IHT_8x4[tx_type].rows, param, lgtmtx_row, 4);
#endif

473
  // inverse transform row vectors and transpose
474
  for (i = 0; i < n; ++i) {
Lester Lu's avatar
Lester Lu committed
475 476 477 478 479 480
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x4[tx_type].rows(input, outtmp);
481
    for (j = 0; j < n2; ++j)
482
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
483
    input += n2;
484 485 486
  }

  // inverse transform column vectors
487
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
488 489 490 491 492 493
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_8x4[tx_type].cols(tmp[i], out[i]);
494 495
  }

496
#if CONFIG_EXT_TX
497
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
498
#endif
499 500

  // Sum with the destination
501 502
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
503 504 505 506 507 508 509
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

510
void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
511 512
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
513 514 515
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
  static const transform_2d IHT_4x16[] = {
    { aom_idct16_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx4_c },          // IDTX
    { aom_idct16_c, iidtx4_c },       // V_DCT
    { iidtx16_c, aom_idct4_c },       // H_DCT
    { aom_iadst16_c, iidtx4_c },      // V_ADST
    { iidtx16_c, aom_iadst4_c },      // H_ADST
    { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
#endif
  };

  const int n = 4;
  const int n4 = 16;
  int i, j;
540
  tran_low_t out[4][16], tmp[4][16], outtmp[4];
541 542 543
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
544 545 546 547 548
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt4(IHT_4x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

549 550
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
551 552 553 554 555 556
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt4(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_4x16[tx_type].rows(input, outtmp);
557
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
558 559 560 561
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
562 563 564
  for (i = 0; i < n; ++i) {
    IHT_4x16[tx_type].cols(tmp[i], out[i]);
  }
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
581 582
                          const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
583 584 585
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
  static const transform_2d IHT_16x4[] = {
    { aom_idct4_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx16_c },          // IDTX
    { aom_idct4_c, iidtx16_c },       // V_DCT
    { iidtx4_c, aom_idct16_c },       // H_DCT
    { aom_iadst4_c, iidtx16_c },      // V_ADST
    { iidtx4_c, aom_iadst16_c },      // H_ADST
    { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
#endif
  };
606

607 608 609 610
  const int n = 4;
  const int n4 = 16;

  int i, j;
611
  tran_low_t out[16][4], tmp[16][4], outtmp[16];
612 613 614
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
615 616 617 618 619
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt4(IHT_16x4[tx_type].cols, param, lgtmtx_col, 16);
#endif

620 621 622
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x4[tx_type].rows(input, outtmp);
623
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
624 625 626 627
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
628 629 630 631 632 633 634 635
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x4[tx_type].cols(tmp[i], out[i]);
  }
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
651
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
652 653
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
654 655 656
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
657
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
658 659 660 661
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
662
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
663 664 665 666 667 668 669 670 671 672 673 674
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
675
#endif
676 677 678 679 680
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
681
  tran_low_t out[8][16], tmp[8][16], outtmp[8];
682 683 684
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

Lester Lu's avatar
Lester Lu committed
685 686 687 688 689
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[16];
  int use_lgt_row = get_inv_lgt8(IHT_8x16[tx_type].rows, param, lgtmtx_row, 16);
#endif

690 691
  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
692 693 694 695 696 697
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x16[tx_type].rows(input, outtmp);
698
    for (j = 0; j < n; ++j)
699
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
700
    input += n;
701 702 703 704
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
705
    IHT_8x16[tx_type].cols(tmp[i], out[i]);
706 707
  }

708
#if CONFIG_EXT_TX
709
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
710
#endif
711 712 713 714 715 716 717 718 719 720 721

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
722
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
723 724
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
725 726 727
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
728
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
729 730 731 732
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
733
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
734 735 736 737 738 739 740 741 742 743 744 745
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
746
#endif
747
  };
748

749 750 751 752
  const int n = 8;
  const int n2 = 16;

  int i, j;
753
  tran_low_t out[16][8], tmp[16][8], outtmp[16];
754 755 756
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
757 758 759 760 761
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[16];
  int use_lgt_col = get_inv_lgt8(IHT_16x8[tx_type].cols, param, lgtmtx_col, 16);
#endif

762 763 764 765
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
766
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
767
    input += n2;
768 769 770 771
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
Lester Lu's avatar
Lester Lu committed
772 773 774 775 776 777
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_16x8[tx_type].cols(tmp[i], out[i]);
778 779
  }

780
#if CONFIG_EXT_TX
781
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
782
#endif
783 784 785 786 787 788 789 790 791 792 793

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

794
void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
795 796
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
797 798 799
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
  static const transform_2d IHT_8x32[] = {
    { aom_idct32_c, aom_idct8_c },     // DCT_DCT
    { ihalfright32_c, aom_idct8_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx8_c },           // IDTX
    { aom_idct32_c, iidtx8_c },        // V_DCT
    { iidtx32_c, aom_idct8_c },        // H_DCT
    { ihalfright32_c, iidtx8_c },      // V_ADST
    { iidtx32_c, aom_iadst8_c },       // H_ADST
    { ihalfright32_c, iidtx8_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
#endif
  };

  const int n = 8;
  const int n4 = 32;
  int i, j;
824
  tran_low_t out[8][32], tmp[8][32], outtmp[8];
825 826 827
  tran_low_t *outp = &out[0][0];
  int outstride = n4;

Lester Lu's avatar
Lester Lu committed
828 829 830 831 832
#if CONFIG_LGT
  const tran_high_t *lgtmtx_row[32];
  int use_lgt_row = get_inv_lgt8(IHT_8x32[tx_type].rows, param, lgtmtx_row, 32);
#endif

833 834
  // inverse transform row vectors and transpose
  for (i = 0; i < n4; ++i) {
Lester Lu's avatar
Lester Lu committed
835 836 837 838 839 840
#if CONFIG_LGT
    if (use_lgt_row)
      ilgt8(input, outtmp, lgtmtx_row[i]);
    else
#endif
      IHT_8x32[tx_type].rows(input, outtmp);
841
    for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
842 843 844 845
    input += n;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
846 847 848
  for (i = 0; i < n; ++i) {
    IHT_8x32[tx_type].cols(tmp[i], out[i]);
  }
849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
#endif

  // Sum with the destination
  for (i = 0; i < n4; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
865 866
                           const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
867 868 869
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
  static const transform_2d IHT_32x8[] = {
    { aom_idct8_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
#if CONFIG_EXT_TX
    { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx32_c },           // IDTX
    { aom_idct8_c, iidtx32_c },        // V_DCT
    { iidtx8_c, aom_idct32_c },        // H_DCT
    { aom_iadst8_c, iidtx32_c },       // V_ADST
    { iidtx8_c, ihalfright32_c },      // H_ADST
    { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
    { iidtx8_c, ihalfright32_c },      // H_FLIPADST
#endif
  };
890

891 892 893 894
  const int n = 8;
  const int n4 = 32;

  int i, j;
895
  tran_low_t out[32][8], tmp[32][8], outtmp[32];
896 897 898
  tran_low_t *outp = &out[0][0];
  int outstride = n;

Lester Lu's avatar
Lester Lu committed
899 900 901 902 903
#if CONFIG_LGT
  const tran_high_t *lgtmtx_col[32];
  int use_lgt_col = get_inv_lgt4(IHT_32x8[tx_type].cols, param, lgtmtx_col, 32);
#endif

904 905 906
  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x8[tx_type].rows(input, outtmp);
907
    for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
908 909 910 911
    input += n4;
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
912 913 914 915 916 917 918 919
  for (i = 0; i < n4; ++i) {
#if CONFIG_LGT
    if (use_lgt_col)
      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
    else
#endif
      IHT_32x8[tx_type].cols(tmp[i], out[i]);
  }
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934

#if CONFIG_EXT_TX
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
#endif

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n4; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
935
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
936 937
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
938 939 940
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
941
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
942 943 944 945
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
946
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
947 948 949 950 951 952 953 954 955 956 957 958
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
959
#endif
960 961 962 963 964
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
965
  tran_low_t out[16][32], tmp[16][32], outtmp[16];
966 967 968 969 970 971 972
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
973
      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
974
    input += n;
975 976 977
  }

  // inverse transform column vectors
Lester Lu's avatar
Lester Lu committed
978
  for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
979

980
#if CONFIG_EXT_TX
981
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
982
#endif
983 984 985 986 987 988 989 990 991 992 993

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
994
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
995 996
                            const INV_TXFM_PARAM *param) {
  int tx_type = param->tx_type;
997 998 999
#if CONFIG_DCT_ONLY
  assert(tx_type == DCT_DCT);
#endif
1000
  static const transform_2d IHT_32x16[] = {
Luca Barbato's avatar
Luca Barbato committed
1001 1002 1003 1004
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
1005
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST