idct.c 64.9 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
#include "./aom_dsp_rtcd.h"
Geza Lore's avatar
Geza Lore committed
15 16 17 18
#include "./av1_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
#include "av1/common/av1_inv_txfm2d_cfg.h"
19 20 21
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Jingning Han's avatar
Jingning Han committed
22

23 24
int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
                 const TX_SIZE tx_size) {
clang-format's avatar
clang-format committed
25
  (void)tx_type;
Yaowu Xu's avatar
Yaowu Xu committed
26
#if CONFIG_AOM_HIGHBITDEPTH
27
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
28
    return txsize_sqr_up_map[tx_size] == TX_32X32;
29 30 31 32
  }
#else
  (void)xd;
#endif
33
  return txsize_sqr_up_map[tx_size] == TX_32X32;
34 35
}

36 37 38
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.

39
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
40 41 42 43 44 45 46 47
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 4; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
48
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
49 50 51 52 53 54 55 56 57 58
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
59
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
60
}
61
#endif  // CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
62

63
// For use in lieu of ADST
64 65 66 67 68 69 70
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
71 72 73
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Luca Barbato's avatar
Luca Barbato committed
74
  aom_idct16_c(inputhalf, output + 16);
75 76 77
  // Note overall scaling factor is 4 times orthogonal
}

Yaowu Xu's avatar
Yaowu Xu committed
78
#if CONFIG_AOM_HIGHBITDEPTH
79
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
80 81 82 83
static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
  for (i = 0; i < 4; ++i)
clang-format's avatar
clang-format committed
84 85
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
86 87 88 89 90
}

static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
clang-format's avatar
clang-format committed
91 92
  (void)bd;
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
93 94 95
}

static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
clang-format's avatar
clang-format committed
96
                             int bd) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
97 98
  int i;
  for (i = 0; i < 16; ++i)
clang-format's avatar
clang-format committed
99 100
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
101 102 103 104 105
}

static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
                             int bd) {
  int i;
clang-format's avatar
clang-format committed
106 107
  (void)bd;
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
108 109
}

110 111 112 113 114 115
static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
                                  int bd) {
  int i;
  tran_low_t inputhalf[16];
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
clang-format's avatar
clang-format committed
116 117
    inputhalf[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
118
  }
119 120 121
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
Yaowu Xu's avatar
Yaowu Xu committed
122
  aom_highbd_idct16_c(inputhalf, output + 16, bd);
123 124
  // Note overall scaling factor is 4 times orthogonal
}
125
#endif  // CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
126
#endif  // CONFIG_AOM_HIGHBITDEPTH
127

Jingning Han's avatar
Jingning Han committed
128
// Inverse identity transform and add.
129
#if CONFIG_EXT_TX
130
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
131
                           int bs, int tx_type) {
132 133
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
134
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
135 136
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
137 138 139
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
140
    }
141 142
  }
}
143
#endif  // CONFIG_EXT_TX
144

clang-format's avatar
clang-format committed
145 146 147 148 149
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
150

151
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
152 153 154
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
155 156 157 158 159 160 161 162
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
163
    case IDTX:
Jingning Han's avatar
Jingning Han committed
164 165
    case V_DCT:
    case H_DCT:
166
    case V_ADST:
clang-format's avatar
clang-format committed
167
    case H_ADST: break;
168 169
    case FLIPADST_DCT:
    case FLIPADST_ADST:
170
    case V_FLIPADST:
171
      // flip UD
172
      FLIPUD_PTR(*dst, *dstride, sizey);
173 174 175
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
176
    case H_FLIPADST:
177
      // flip LR
178
      FLIPUD_PTR(*src, *sstride, sizex);
179 180 181
      break;
    case FLIPADST_FLIPADST:
      // flip UD
182
      FLIPUD_PTR(*dst, *dstride, sizey);
183
      // flip LR
184
      FLIPUD_PTR(*src, *sstride, sizex);
185
      break;
clang-format's avatar
clang-format committed
186
    default: assert(0); break;
187 188
  }
}
189
#endif  // CONFIG_EXT_TX
190

Yaowu Xu's avatar
Yaowu Xu committed
191
#if CONFIG_AOM_HIGHBITDEPTH
192
#if CONFIG_EXT_TX
193
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
194
                                  int stride, int bs, int tx_type, int bd) {
195 196 197
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
198

Debargha Mukherjee's avatar
Debargha Mukherjee committed
199
  if (tx_type == IDTX) {
200 201
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
202 203 204
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
205
    }
206 207
  }
}
208

clang-format's avatar
clang-format committed
209 210 211
static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
                                 int *sstride, int tx_type, int sizey,
                                 int sizex) {
212 213 214 215 216 217 218 219
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
220 221 222
    case IDTX:
    case V_DCT:
    case H_DCT:
223
    case V_ADST:
clang-format's avatar
clang-format committed
224
    case H_ADST: break;
225 226
    case FLIPADST_DCT:
    case FLIPADST_ADST:
227
    case V_FLIPADST:
228
      // flip UD
229
      FLIPUD_PTR(*dst, *dstride, sizey);
230 231 232
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
233
    case H_FLIPADST:
234
      // flip LR
235
      FLIPUD_PTR(*src, *sstride, sizex);
236 237 238
      break;
    case FLIPADST_FLIPADST:
      // flip UD
239
      FLIPUD_PTR(*dst, *dstride, sizey);
240
      // flip LR
241
      FLIPUD_PTR(*src, *sstride, sizex);
242
      break;
clang-format's avatar
clang-format committed
243
    default: assert(0); break;
244 245
  }
}
246
#endif  // CONFIG_EXT_TX
247
#endif  // CONFIG_AOM_HIGHBITDEPTH
248

Yaowu Xu's avatar
Yaowu Xu committed
249 250
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
251
  static const transform_2d IHT_4[] = {
Luca Barbato's avatar
Luca Barbato committed
252 253 254 255
    { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
    { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
    { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
    { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
256
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
257 258 259 260 261 262 263 264 265 266 267 268 269
    { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },          // IDTX
    { aom_idct4_c, iidtx4_c },       // V_DCT
    { iidtx4_c, aom_idct4_c },       // H_DCT
    { aom_iadst4_c, iidtx4_c },      // V_ADST
    { iidtx4_c, aom_iadst4_c },      // H_ADST
    { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
#endif                               // CONFIG_EXT_TX
270 271 272
  };

  int i, j;
273 274 275 276
  tran_low_t tmp;
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
277 278 279

  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
280
    IHT_4[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
281
    input += 4;
282 283 284
  }

  // transpose
clang-format's avatar
clang-format committed
285
  for (i = 1; i < 4; i++) {
286
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
287
      tmp = out[i][j];
288 289 290
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
291 292 293 294
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
295 296 297 298
    IHT_4[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
299
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
300 301 302 303
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
304
    for (j = 0; j < 4; ++j) {
305 306 307
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
308 309 310 311
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
312 313
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
314
  static const transform_2d IHT_4x8[] = {
Luca Barbato's avatar
Luca Barbato committed
315 316 317 318
    { aom_idct8_c, aom_idct4_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
319
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
320 321 322 323 324 325 326 327 328 329 330 331
    { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },          // IDTX
    { aom_idct8_c, iidtx4_c },       // V_DCT
    { iidtx8_c, aom_idct4_c },       // H_DCT
    { aom_iadst8_c, iidtx4_c },      // V_ADST
    { iidtx8_c, aom_iadst4_c },      // H_ADST
    { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
332
#endif
333 334
  };

335 336
  const int n = 4;
  const int n2 = 8;
337 338 339
  int i, j;
  tran_low_t out[4][8], outtmp[4];
  tran_low_t *outp = &out[0][0];
340
  int outstride = n2;
341 342

  // inverse transform row vectors and transpose
343
  for (i = 0; i < n2; ++i) {
344
    IHT_4x8[tx_type].rows(input, outtmp);
345
    for (j = 0; j < n; ++j)
346
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
347
    input += n;
348 349 350
  }

  // inverse transform column vectors
351
  for (i = 0; i < n; ++i) {
352 353 354
    IHT_4x8[tx_type].cols(out[i], out[i]);
  }

355
#if CONFIG_EXT_TX
356
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
357
#endif
358 359

  // Sum with the destination
360 361
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
362 363 364 365 366 367 368
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
369 370
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
371
  static const transform_2d IHT_8x4[] = {
Luca Barbato's avatar
Luca Barbato committed
372 373 374 375
    { aom_idct4_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
376
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
377 378 379 380 381 382 383 384 385 386 387 388
    { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },          // IDTX
    { aom_idct4_c, iidtx8_c },       // V_DCT
    { iidtx4_c, aom_idct8_c },       // H_DCT
    { aom_iadst4_c, iidtx8_c },      // V_ADST
    { iidtx4_c, aom_iadst8_c },      // H_ADST
    { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
    { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
389
#endif
390
  };
391 392
  const int n = 4;
  const int n2 = 8;
393 394 395 396

  int i, j;
  tran_low_t out[8][4], outtmp[8];
  tran_low_t *outp = &out[0][0];
397
  int outstride = n;
398 399

  // inverse transform row vectors and transpose
400
  for (i = 0; i < n; ++i) {
401
    IHT_8x4[tx_type].rows(input, outtmp);
402
    for (j = 0; j < n2; ++j)
403
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
404
    input += n2;
405 406 407
  }

  // inverse transform column vectors
408
  for (i = 0; i < n2; ++i) {
409 410 411
    IHT_8x4[tx_type].cols(out[i], out[i]);
  }

412
#if CONFIG_EXT_TX
413
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
414
#endif
415 416

  // Sum with the destination
417 418
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
419 420 421 422 423 424 425
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
426 427
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
428
  static const transform_2d IHT_8x16[] = {
Luca Barbato's avatar
Luca Barbato committed
429 430 431 432
    { aom_idct16_c, aom_idct8_c },    // DCT_DCT
    { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
433
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
434 435 436 437 438 439 440 441 442 443 444 445
    { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },          // IDTX
    { aom_idct16_c, iidtx8_c },       // V_DCT
    { iidtx16_c, aom_idct8_c },       // H_DCT
    { aom_iadst16_c, iidtx8_c },      // V_ADST
    { iidtx16_c, aom_iadst8_c },      // H_ADST
    { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
446
#endif
447 448 449 450 451 452 453 454 455 456 457 458 459 460
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
  tran_low_t out[8][16], outtmp[8];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_8x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
461
    input += n;
462 463 464 465 466 467 468
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_8x16[tx_type].cols(out[i], out[i]);
  }

469
#if CONFIG_EXT_TX
470
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
471
#endif
472 473 474 475 476 477 478 479 480 481 482

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
483 484
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
485
  static const transform_2d IHT_16x8[] = {
Luca Barbato's avatar
Luca Barbato committed
486 487 488 489
    { aom_idct8_c, aom_idct16_c },    // DCT_DCT
    { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
490
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
491 492 493 494 495 496 497 498 499 500 501 502
    { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },          // IDTX
    { aom_idct8_c, iidtx16_c },       // V_DCT
    { iidtx8_c, aom_idct16_c },       // H_DCT
    { aom_iadst8_c, iidtx16_c },      // V_ADST
    { iidtx8_c, aom_iadst16_c },      // H_ADST
    { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
503
#endif
504 505 506 507 508 509 510 511 512 513 514 515 516 517
  };
  const int n = 8;
  const int n2 = 16;

  int i, j;
  tran_low_t out[16][8], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
518
    input += n2;
519 520 521 522 523 524 525
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_16x8[tx_type].cols(out[i], out[i]);
  }

526
#if CONFIG_EXT_TX
527
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
528
#endif
529 530 531 532 533 534 535 536 537 538 539

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
540 541
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
542
  static const transform_2d IHT_16x32[] = {
Luca Barbato's avatar
Luca Barbato committed
543 544 545 546
    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
547
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
548 549 550 551 552 553 554 555 556 557 558 559
    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx16_c },           // IDTX
    { aom_idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, aom_idct16_c },        // H_DCT
    { ihalfright32_c, iidtx16_c },      // V_ADST
    { iidtx32_c, aom_iadst16_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
560
#endif
561 562 563 564 565 566 567 568 569 570 571 572 573 574
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
  tran_low_t out[16][32], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
575
    input += n;
576 577 578 579 580 581 582
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_16x32[tx_type].cols(out[i], out[i]);
  }

583
#if CONFIG_EXT_TX
584
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
585
#endif
586 587 588 589 590 591 592 593 594 595 596

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
597 598
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
599
  static const transform_2d IHT_32x16[] = {
Luca Barbato's avatar
Luca Barbato committed
600 601 602 603
    { aom_idct16_c, aom_idct32_c },     // DCT_DCT
    { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_ADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
604
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
605 606 607 608 609 610 611 612 613 614 615 616
    { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx32_c },           // IDTX
    { aom_idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, aom_idct32_c },        // H_DCT
    { aom_iadst16_c, iidtx32_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },      // H_ADST
    { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },      // H_FLIPADST
617
#endif
618 619 620 621 622 623 624 625 626 627 628 629 630 631
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
  tran_low_t out[32][16], outtmp[32];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
632
    input += n2;
633 634 635 636 637 638 639
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_32x16[tx_type].cols(out[i], out[i]);
  }

640
#if CONFIG_EXT_TX
641
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
642
#endif
643 644 645 646 647 648 649 650 651 652 653

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
654 655
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
656
  static const transform_2d IHT_8[] = {
Luca Barbato's avatar
Luca Barbato committed
657 658 659 660
    { aom_idct8_c, aom_idct8_c },    // DCT_DCT  = 0
    { aom_iadst8_c, aom_idct8_c },   // ADST_DCT = 1
    { aom_idct8_c, aom_iadst8_c },   // DCT_ADST = 2
    { aom_iadst8_c, aom_iadst8_c },  // ADST_ADST = 3
661
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
662 663 664 665 666 667 668 669 670 671 672 673 674
    { aom_iadst8_c, aom_idct8_c },   // FLIPADST_DCT
    { aom_idct8_c, aom_iadst8_c },   // DCT_FLIPADST
    { aom_iadst8_c, aom_iadst8_c },  // FLIPADST_FLIPADST
    { aom_iadst8_c, aom_iadst8_c },  // ADST_FLIPADST
    { aom_iadst8_c, aom_iadst8_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx8_c },          // IDTX
    { aom_idct8_c, iidtx8_c },       // V_DCT
    { iidtx8_c, aom_idct8_c },       // H_DCT
    { aom_iadst8_c, iidtx8_c },      // V_ADST
    { iidtx8_c, aom_iadst8_c },      // H_ADST
    { aom_iadst8_c, iidtx8_c },      // V_FLIPADST
    { iidtx8_c, aom_iadst8_c },      // H_FLIPADST
#endif                               // CONFIG_EXT_TX
675 676
  };

Jingning Han's avatar
Jingning Han committed
677
  int i, j;
678 679 680 681
  tran_low_t tmp;
  tran_low_t out[8][8];
  tran_low_t *outp = &out[0][0];
  int outstride = 8;
Jingning Han's avatar
Jingning Han committed
682 683 684

  // inverse transform row vectors
  for (i = 0; i < 8; ++i) {
685
    IHT_8[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
686
    input += 8;
687 688 689
  }

  // transpose
clang-format's avatar
clang-format committed
690
  for (i = 1; i < 8; i++) {
691
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
692
      tmp = out[i][j];
693 694 695
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
Jingning Han's avatar
Jingning Han committed
696 697 698 699
  }

  // inverse transform column vectors
  for (i = 0; i < 8; ++i) {
700 701 702 703
    IHT_8[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
704
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
705 706 707 708
#endif

  // Sum with the destination
  for (i = 0; i < 8; ++i) {
Jingning Han's avatar
Jingning Han committed
709
    for (j = 0; j < 8; ++j) {
710 711 712
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Jingning Han's avatar
Jingning Han committed
713 714 715 716
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
717 718
void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
719
  static const transform_2d IHT_16[] = {
Luca Barbato's avatar
Luca Barbato committed
720 721 722 723
    { aom_idct16_c, aom_idct16_c },    // DCT_DCT  = 0
    { aom_iadst16_c, aom_idct16_c },   // ADST_DCT = 1
    { aom_idct16_c, aom_iadst16_c },   // DCT_ADST = 2
    { aom_iadst16_c, aom_iadst16_c },  // ADST_ADST = 3
724
#if CONFIG_EXT_TX
Luca Barbato's avatar
Luca Barbato committed
725 726 727 728 729 730 731 732 733 734 735 736 737
    { aom_iadst16_c, aom_idct16_c },   // FLIPADST_DCT
    { aom_idct16_c, aom_iadst16_c },   // DCT_FLIPADST
    { aom_iadst16_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    { aom_iadst16_c, aom_iadst16_c },  // ADST_FLIPADST
    { aom_iadst16_c, aom_iadst16_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx16_c },          // IDTX
    { aom_idct16_c, iidtx16_c },       // V_DCT
    { iidtx16_c, aom_idct16_c },       // H_DCT
    { aom_iadst16_c, iidtx16_c },      // V_ADST
    { iidtx16_c, aom_iadst16_c },      // H_ADST
    { aom_iadst16_c, iidtx16_c },      // V_FLIPADST
    { iidtx16_c, aom_iadst16_c },      // H_FLIPADST
#endif                                 // CONFIG_EXT_TX
738
  };
739

Jingning Han's avatar
Jingning Han committed
740
  int i, j;
741 742 743 744 745 746 747 748
  tran_low_t tmp;
  tran_low_t out[16][16];
  tran_low_t *outp = &out[0][0];
  int outstride = 16;

  // inverse transform row vectors
  for (i = 0; i < 16; ++i) {
    IHT_16[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
749
    input += 16;
750 751 752
  }

  // transpose
clang-format's avatar
clang-format committed
753
  for (i = 1; i < 16; i++) {
754
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
755
      tmp = out[i][j];
756 757 758 759
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
  }
Jingning Han's avatar
Jingning Han committed
760

761
  // inverse transform column vectors
Jingning Han's avatar
Jingning Han committed
762
  for (i = 0; i < 16; ++i) {
763
    IHT_16[tx_type].cols(out[i], out[i]);
Jingning Han's avatar
Jingning Han committed
764 765
  }

766
#if CONFIG_EXT_TX
767
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
768 769 770
#endif

  // Sum with the destination
Jingning Han's avatar
Jingning Han committed
771 772
  for (i = 0; i < 16; ++i) {
    for (j = 0; j < 16; ++j) {
773 774 775
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Jingning Han's avatar
Jingning Han committed
776 777 778 779
    }
  }
}

780
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
781 782
void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             int tx_type) {
783
  static const transform_2d IHT_32[] = {
Luca Barbato's avatar
Luca Barbato committed
784 785 786
    { aom_idct32_c, aom_idct32_c },      // DCT_DCT
    { ihalfright32_c, aom_idct32_c },    // ADST_DCT
    { aom_idct32_c, ihalfright32_c },    // DCT_ADST
clang-format's avatar
clang-format committed
787
    { ihalfright32_c, ihalfright32_c },  // ADST_ADST
Luca Barbato's avatar
Luca Barbato committed
788 789
    { ihalfright32_c, aom_idct32_c },    // FLIPADST_DCT
    { aom_idct32_c, ihalfright32_c },    // DCT_FLIPADST
clang-format's avatar
clang-format committed
790 791 792 793
    { ihalfright32_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // ADST_FLIPADST
    { ihalfright32_c, ihalfright32_c },  // FLIPADST_ADST
    { iidtx32_c, iidtx32_c },            // IDTX
Luca Barbato's avatar
Luca Barbato committed
794 795
    { aom_idct32_c, iidtx32_c },         // V_DCT
    { iidtx32_c, aom_idct32_c },         // H_DCT
clang-format's avatar
clang-format committed
796 797 798 799
    { ihalfright32_c, iidtx16_c },       // V_ADST
    { iidtx16_c, ihalfright32_c },       // H_ADST
    { ihalfright32_c, iidtx16_c },       // V_FLIPADST
    { iidtx16_c, ihalfright32_c },       // H_FLIPADST
800 801 802 803 804 805 806 807 808 809 810
  };

  int i, j;
  tran_low_t tmp;
  tran_low_t out[32][32];
  tran_low_t *outp = &out[0][0];
  int outstride = 32;

  // inverse transform row vectors
  for (i = 0; i < 32; ++i) {
    IHT_32[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
811
    input += 32;
812 813 814
  }

  // transpose
clang-format's avatar
clang-format committed
815
  for (i = 1; i < 32; i++) {
816
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
817
      tmp = out[i][j];
818 819 820 821 822 823 824 825 826 827
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
  }

  // inverse transform column vectors
  for (i = 0; i < 32; ++i) {
    IHT_32[tx_type].cols(out[i], out[i]);
  }

828
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
829 830 831 832 833 834 835 836 837 838 839 840

  // Sum with the destination
  for (i = 0; i < 32; ++i) {
    for (j = 0; j < 32; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}
#endif  // CONFIG_EXT_TX

Jingning Han's avatar
Jingning Han committed
841
// idct
Yaowu Xu's avatar
Yaowu Xu committed
842 843
void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
844
  if (eob > 1)
Yaowu Xu's avatar
Yaowu Xu committed
845
    aom_idct4x4_16_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
846
  else
Yaowu Xu's avatar
Yaowu Xu committed
847
    aom_idct4x4_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
848 849
}

Yaowu Xu's avatar
Yaowu Xu committed
850 851
void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
852
  if (eob > 1)
Yaowu Xu's avatar
Yaowu Xu committed
853
    aom_iwht4x4_16_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
854
  else
Yaowu Xu's avatar
Yaowu Xu committed
855
    aom_iwht4x4_1_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
856 857
}

Yaowu Xu's avatar
Yaowu Xu committed
858 859
void av1_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
                     int eob) {
Jingning Han's avatar
Jingning Han committed
860 861 862 863 864
  // If dc is 1, then input[0] is the reconstructed value, do not need
  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.

  // The calculation can be simplified if there are not many non-zero dct
  // coefficients. Use eobs to decide what to do.
Yaowu Xu's avatar
Yaowu Xu committed
865
  // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Jingning Han's avatar
Jingning Han committed
866 867 868
  // Combine that with code here.
  if (eob == 1)
    // DC only DCT coefficient
Yaowu Xu's avatar
Yaowu Xu committed
869
    aom_idct8x8_1_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
870
#if !CONFIG_ADAPT_SCAN
Jingning Han's avatar
Jingning Han committed
871
  else if (eob <= 12)
Yaowu Xu's avatar
Yaowu Xu committed
872
    aom_idct8x8_12_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
873
#endif
Jingning Han's avatar
Jingning Han committed
874
  else
Yaowu Xu's avatar
Yaowu Xu committed
875
    aom_idct8x8_64_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
876 877
}

Yaowu Xu's avatar
Yaowu Xu committed
878 879
void av1_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
Jingning Han's avatar
Jingning Han committed
880 881
  /* The calculation can be simplified if there are not many non-zero dct
   * coefficients. Use eobs to separate different cases. */
clang-format's avatar
clang-format committed
882
  if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xu's avatar
Yaowu Xu committed
883
    aom_idct16x16_1_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
884
#if !CONFIG_ADAPT_SCAN
Jingning Han's avatar
Jingning Han committed
885
  else if (eob <= 10)
Yaowu Xu's avatar
Yaowu Xu committed
886
    aom_idct16x16_10_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
887
#endif
Jingning Han's avatar
Jingning Han committed
888
  else
Yaowu Xu's avatar
Yaowu Xu committed
889
    aom_idct16x16_256_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
890 891
}

Yaowu Xu's avatar
Yaowu Xu committed
892 893
void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
Angie Chiang's avatar
Angie Chiang committed
894 895
  if (eob == 1) aom_idct32x32_1_add(input, dest, stride);
#if !CONFIG_ADAPT_SCAN
Jingning Han's avatar
Jingning Han committed
896 897
  else if (eob <= 34)
    // non-zero coeff only in upper-left 8x8
Yaowu Xu's avatar
Yaowu Xu committed
898
    aom_idct32x32_34_add(input, dest, stride);
Angie Chiang's avatar
Angie Chiang committed
899
#endif
Jingning Han's avatar
Jingning Han committed
900
  else
Yaowu Xu's avatar
Yaowu Xu committed
901
    aom_idct32x32_1024_add(input, dest, stride);
Jingning Han's avatar
Jingning Han committed
902 903
}