idct.c 73.2 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1 2 3 4 5 6 7 8 9 10 11 12
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
13 14
#include "./av1_rtcd.h"
#include "./aom_dsp_rtcd.h"
15 16 17
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
Yaowu Xu's avatar
Yaowu Xu committed
18
#include "av1/common/av1_inv_txfm2d_cfg.h"
19 20
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
Jingning Han's avatar
Jingning Han committed
21

22 23
int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
                 const TX_SIZE tx_size) {
clang-format's avatar
clang-format committed
24
  (void)tx_type;
Yaowu Xu's avatar
Yaowu Xu committed
25
#if CONFIG_AOM_HIGHBITDEPTH
26
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
27
    return txsize_sqr_up_map[tx_size] == TX_32X32;
28 29 30 31
  }
#else
  (void)xd;
#endif
32
  return txsize_sqr_up_map[tx_size] == TX_32X32;
33 34
}

35
#if CONFIG_EXT_TX
Debargha Mukherjee's avatar
Debargha Mukherjee committed
36 37 38 39 40 41 42 43
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 4; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
}

static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
44
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
45 46 47 48 49 50 51 52 53 54
}

static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  for (i = 0; i < 16; ++i)
    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
}

static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
clang-format's avatar
clang-format committed
55
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
56 57
}

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
// For use in lieu of DST
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
  int i;
  tran_low_t inputhalf[16];
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
  }
  idct16_c(inputhalf, output + 16);
  // Note overall scaling factor is 4 times orthogonal
}

Yaowu Xu's avatar
Yaowu Xu committed
73
#if CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjee's avatar
Debargha Mukherjee committed
74 75 76 77
static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
  for (i = 0; i < 4; ++i)
clang-format's avatar
clang-format committed
78 79
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
80 81 82 83 84
}

static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
                            int bd) {
  int i;
clang-format's avatar
clang-format committed
85 86
  (void)bd;
  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
87 88 89
}

static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
clang-format's avatar
clang-format committed
90
                             int bd) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
91 92
  int i;
  for (i = 0; i < 16; ++i)
clang-format's avatar
clang-format committed
93 94
    output[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
Debargha Mukherjee's avatar
Debargha Mukherjee committed
95 96 97 98 99
}

static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
                             int bd) {
  int i;
clang-format's avatar
clang-format committed
100 101
  (void)bd;
  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
102 103
}

104 105 106 107 108 109 110 111 112
static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
                                  int bd) {
  int i;
  tran_low_t inputhalf[16];
  for (i = 0; i < 16; ++i) {
    output[i] = input[16 + i] * 4;
  }
  // Multiply input by sqrt(2)
  for (i = 0; i < 16; ++i) {
clang-format's avatar
clang-format committed
113 114
    inputhalf[i] =
        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
115
  }
Yaowu Xu's avatar
Yaowu Xu committed
116
  aom_highbd_idct16_c(inputhalf, output + 16, bd);
117 118
  // Note overall scaling factor is 4 times orthogonal
}
Yaowu Xu's avatar
Yaowu Xu committed
119
#endif  // CONFIG_AOM_HIGHBITDEPTH
120

Jingning Han's avatar
Jingning Han committed
121
// Inverse identity transform and add.
122
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Jingning Han's avatar
Jingning Han committed
123
                           int bs, int tx_type) {
124 125
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
126
  if (tx_type == IDTX) {
Jingning Han's avatar
Jingning Han committed
127 128
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
129 130 131
        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
      dest += stride;
      input += bs;
Jingning Han's avatar
Jingning Han committed
132
    }
133 134 135
  }
}

clang-format's avatar
clang-format committed
136 137 138 139 140
#define FLIPUD_PTR(dest, stride, size)       \
  do {                                       \
    (dest) = (dest) + ((size)-1) * (stride); \
    (stride) = -(stride);                    \
  } while (0)
141

clang-format's avatar
clang-format committed
142 143 144
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
                               int *sstride, int tx_type, int sizey,
                               int sizex) {
145 146 147 148 149 150 151 152
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
153
    case IDTX:
Jingning Han's avatar
Jingning Han committed
154 155
    case V_DCT:
    case H_DCT:
156
    case V_ADST:
clang-format's avatar
clang-format committed
157
    case H_ADST: break;
158 159
    case FLIPADST_DCT:
    case FLIPADST_ADST:
160
    case V_FLIPADST:
161
      // flip UD
162
      FLIPUD_PTR(*dst, *dstride, sizey);
163 164 165
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
166
    case H_FLIPADST:
167
      // flip LR
168
      FLIPUD_PTR(*src, *sstride, sizex);
169 170 171
      break;
    case FLIPADST_FLIPADST:
      // flip UD
172
      FLIPUD_PTR(*dst, *dstride, sizey);
173
      // flip LR
174
      FLIPUD_PTR(*src, *sstride, sizex);
175
      break;
clang-format's avatar
clang-format committed
176
    default: assert(0); break;
177 178 179
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
180
#if CONFIG_AOM_HIGHBITDEPTH
181
void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
182 183
  tran_low_t step[4];
  tran_high_t temp1, temp2;
clang-format's avatar
clang-format committed
184
  (void)bd;
185 186 187
  // stage 1
  temp1 = (input[3] + input[1]) * cospi_16_64;
  temp2 = (input[3] - input[1]) * cospi_16_64;
188 189
  step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
190 191
  temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
  temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
192 193
  step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
194 195

  // stage 2
196 197 198 199
  output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
  output[1] = HIGHBD_WRAPLOW(-step[1] - step[2], bd);
  output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
  output[3] = HIGHBD_WRAPLOW(step[3] - step[0], bd);
Jingning Han's avatar
Jingning Han committed
200 201
}

202
void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
203 204
  tran_low_t step1[8], step2[8];
  tran_high_t temp1, temp2;
clang-format's avatar
clang-format committed
205
  (void)bd;
206 207 208 209 210 211 212
  // stage 1
  step1[0] = input[7];
  step1[2] = input[3];
  step1[1] = input[5];
  step1[3] = input[1];
  temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
  temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
213 214
  step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
215 216
  temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
  temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
217 218
  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
219 220 221 222

  // stage 2
  temp1 = (step1[0] + step1[2]) * cospi_16_64;
  temp2 = (step1[0] - step1[2]) * cospi_16_64;
223 224
  step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
225 226
  temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
  temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
227 228 229 230 231 232
  step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
  step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
  step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
  step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
  step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
233 234

  // stage 3
235 236 237 238
  step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
  step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
  step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
  step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
239 240 241
  step1[4] = step2[4];
  temp1 = (step2[6] - step2[5]) * cospi_16_64;
  temp2 = (step2[5] + step2[6]) * cospi_16_64;
242 243
  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
244 245 246
  step1[7] = step2[7];

  // stage 4
247 248 249 250 251 252 253 254
  output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
  output[1] = HIGHBD_WRAPLOW(-step1[1] - step1[6], bd);
  output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
  output[3] = HIGHBD_WRAPLOW(-step1[3] - step1[4], bd);
  output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
  output[5] = HIGHBD_WRAPLOW(-step1[2] + step1[5], bd);
  output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
  output[7] = HIGHBD_WRAPLOW(-step1[0] + step1[7], bd);
255 256 257
}

void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
Yaowu Xu's avatar
Yaowu Xu committed
258
  // av1_highbd_igentx16(input, output, bd, Tx16);
259 260
  tran_low_t step1[16], step2[16];
  tran_high_t temp1, temp2;
clang-format's avatar
clang-format committed
261
  (void)bd;
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292

  // stage 1
  step1[0] = input[15];
  step1[1] = input[7];
  step1[2] = input[11];
  step1[3] = input[3];
  step1[4] = input[13];
  step1[5] = input[5];
  step1[6] = input[9];
  step1[7] = input[1];
  step1[8] = input[14];
  step1[9] = input[6];
  step1[10] = input[10];
  step1[11] = input[2];
  step1[12] = input[12];
  step1[13] = input[4];
  step1[14] = input[8];
  step1[15] = input[0];

  // stage 2
  step2[0] = step1[0];
  step2[1] = step1[1];
  step2[2] = step1[2];
  step2[3] = step1[3];
  step2[4] = step1[4];
  step2[5] = step1[5];
  step2[6] = step1[6];
  step2[7] = step1[7];

  temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
  temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
293 294
  step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
295 296 297

  temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
  temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
298 299
  step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
300 301 302

  temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
  temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
303 304
  step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
305 306 307

  temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
  temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
308 309
  step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
310 311 312 313 314 315 316 317 318

  // stage 3
  step1[0] = step2[0];
  step1[1] = step2[1];
  step1[2] = step2[2];
  step1[3] = step2[3];

  temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
  temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
319 320
  step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
321 322
  temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
  temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
323 324 325 326 327 328 329 330 331 332 333
  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);

  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
  step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
  step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
  step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
  step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
  step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
  step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
  step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
334 335 336 337

  // stage 4
  temp1 = (step1[0] + step1[1]) * cospi_16_64;
  temp2 = (step1[0] - step1[1]) * cospi_16_64;
338 339
  step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
340 341
  temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
  temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
342 343 344 345 346 347
  step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
  step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
  step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
  step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
  step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
348 349 350 351 352

  step2[8] = step1[8];
  step2[15] = step1[15];
  temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
  temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
353 354
  step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
355 356
  temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
  temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
357 358
  step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
359 360 361 362
  step2[11] = step1[11];
  step2[12] = step1[12];

  // stage 5
363 364 365 366
  step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
  step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
  step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
  step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
367 368 369
  step1[4] = step2[4];
  temp1 = (step2[6] - step2[5]) * cospi_16_64;
  temp2 = (step2[5] + step2[6]) * cospi_16_64;
370 371
  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
372 373
  step1[7] = step2[7];

374 375 376 377 378 379 380 381
  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
  step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
  step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
  step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
  step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
  step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
  step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
  step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
382 383

  // stage 6
384 385 386 387 388 389 390 391
  step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
  step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
  step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
  step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
  step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
  step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
  step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
  step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
392 393 394 395
  step2[8] = step1[8];
  step2[9] = step1[9];
  temp1 = (-step1[10] + step1[13]) * cospi_16_64;
  temp2 = (step1[10] + step1[13]) * cospi_16_64;
396 397
  step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
398 399
  temp1 = (-step1[11] + step1[12]) * cospi_16_64;
  temp2 = (step1[11] + step1[12]) * cospi_16_64;
400 401
  step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
  step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
402 403 404 405
  step2[14] = step1[14];
  step2[15] = step1[15];

  // stage 7
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
  output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
  output[1] = HIGHBD_WRAPLOW(-step2[1] - step2[14], bd);
  output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
  output[3] = HIGHBD_WRAPLOW(-step2[3] - step2[12], bd);
  output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
  output[5] = HIGHBD_WRAPLOW(-step2[5] - step2[10], bd);
  output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
  output[7] = HIGHBD_WRAPLOW(-step2[7] - step2[8], bd);
  output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
  output[9] = HIGHBD_WRAPLOW(-step2[6] + step2[9], bd);
  output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
  output[11] = HIGHBD_WRAPLOW(-step2[4] + step2[11], bd);
  output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
  output[13] = HIGHBD_WRAPLOW(-step2[2] + step2[13], bd);
  output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
  output[15] = HIGHBD_WRAPLOW(-step2[0] + step2[15], bd);
422
}
Jingning Han's avatar
Jingning Han committed
423

424
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
425
                                  int stride, int bs, int tx_type, int bd) {
426 427 428
  int r, c;
  const int shift = bs < 32 ? 3 : 2;
  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
429

Debargha Mukherjee's avatar
Debargha Mukherjee committed
430
  if (tx_type == IDTX) {
431 432
    for (r = 0; r < bs; ++r) {
      for (c = 0; c < bs; ++c)
Debargha Mukherjee's avatar
Debargha Mukherjee committed
433 434 435
        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
      dest += stride;
      input += bs;
436
    }
437 438
  }
}
439

clang-format's avatar
clang-format committed
440 441 442
static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
                                 int *sstride, int tx_type, int sizey,
                                 int sizex) {
443 444 445 446 447 448 449 450
  // Note that the transpose of src will be added to dst. In order to LR
  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
  // the addends, we UD flip the dst.
  switch (tx_type) {
    case DCT_DCT:
    case ADST_DCT:
    case DCT_ADST:
    case ADST_ADST:
Debargha Mukherjee's avatar
Debargha Mukherjee committed
451 452 453
    case IDTX:
    case V_DCT:
    case H_DCT:
454
    case V_ADST:
clang-format's avatar
clang-format committed
455
    case H_ADST: break;
456 457
    case FLIPADST_DCT:
    case FLIPADST_ADST:
458
    case V_FLIPADST:
459
      // flip UD
460
      FLIPUD_PTR(*dst, *dstride, sizey);
461 462 463
      break;
    case DCT_FLIPADST:
    case ADST_FLIPADST:
464
    case H_FLIPADST:
465
      // flip LR
466
      FLIPUD_PTR(*src, *sstride, sizex);
467 468 469
      break;
    case FLIPADST_FLIPADST:
      // flip UD
470
      FLIPUD_PTR(*dst, *dstride, sizey);
471
      // flip LR
472
      FLIPUD_PTR(*src, *sstride, sizex);
473
      break;
clang-format's avatar
clang-format committed
474
    default: assert(0); break;
475 476
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
477
#endif  // CONFIG_AOM_HIGHBITDEPTH
478 479
#endif  // CONFIG_EXT_TX

Yaowu Xu's avatar
Yaowu Xu committed
480 481
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
482
  static const transform_2d IHT_4[] = {
clang-format's avatar
clang-format committed
483 484 485
    { idct4_c, idct4_c },    // DCT_DCT
    { iadst4_c, idct4_c },   // ADST_DCT
    { idct4_c, iadst4_c },   // DCT_ADST
486
    { iadst4_c, iadst4_c },  // ADST_ADST
487
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
488 489
    { iadst4_c, idct4_c },   // FLIPADST_DCT
    { idct4_c, iadst4_c },   // DCT_FLIPADST
490 491 492 493
    { iadst4_c, iadst4_c },  // FLIPADST_FLIPADST
    { iadst4_c, iadst4_c },  // ADST_FLIPADST
    { iadst4_c, iadst4_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx4_c },  // IDTX
clang-format's avatar
clang-format committed
494 495
    { idct4_c, iidtx4_c },   // V_DCT
    { iidtx4_c, idct4_c },   // H_DCT
496 497 498 499
    { iadst4_c, iidtx4_c },  // V_ADST
    { iidtx4_c, iadst4_c },  // H_ADST
    { iadst4_c, iidtx4_c },  // V_FLIPADST
    { iidtx4_c, iadst4_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
500
#endif                       // CONFIG_EXT_TX
501 502 503
  };

  int i, j;
504 505 506 507
  tran_low_t tmp;
  tran_low_t out[4][4];
  tran_low_t *outp = &out[0][0];
  int outstride = 4;
508 509 510

  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
511
    IHT_4[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
512
    input += 4;
513 514 515
  }

  // transpose
clang-format's avatar
clang-format committed
516
  for (i = 1; i < 4; i++) {
517
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
518
      tmp = out[i][j];
519 520 521
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
522 523 524 525
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
526 527 528 529
    IHT_4[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
530
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
531 532 533 534
#endif

  // Sum with the destination
  for (i = 0; i < 4; ++i) {
535
    for (j = 0; j < 4; ++j) {
536 537 538
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
539 540 541 542
    }
  }
}

543
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
544 545
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
546
  static const transform_2d IHT_4x8[] = {
clang-format's avatar
clang-format committed
547 548 549
    { idct8_c, idct4_c },    // DCT_DCT
    { iadst8_c, idct4_c },   // ADST_DCT
    { idct8_c, iadst4_c },   // DCT_ADST
550
    { iadst8_c, iadst4_c },  // ADST_ADST
clang-format's avatar
clang-format committed
551 552
    { iadst8_c, idct4_c },   // FLIPADST_DCT
    { idct8_c, iadst4_c },   // DCT_FLIPADST
553 554 555 556
    { iadst8_c, iadst4_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst4_c },  // ADST_FLIPADST
    { iadst8_c, iadst4_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx4_c },  // IDTX
clang-format's avatar
clang-format committed
557 558
    { idct8_c, iidtx4_c },   // V_DCT
    { iidtx8_c, idct4_c },   // H_DCT
559 560 561 562 563 564
    { iadst8_c, iidtx4_c },  // V_ADST
    { iidtx8_c, iadst4_c },  // H_ADST
    { iadst8_c, iidtx4_c },  // V_FLIPADST
    { iidtx8_c, iadst4_c },  // H_FLIPADST
  };

565 566
  const int n = 4;
  const int n2 = 8;
567 568 569
  int i, j;
  tran_low_t out[4][8], outtmp[4];
  tran_low_t *outp = &out[0][0];
570
  int outstride = n2;
571 572

  // inverse transform row vectors and transpose
573
  for (i = 0; i < n2; ++i) {
574
    IHT_4x8[tx_type].rows(input, outtmp);
575
    for (j = 0; j < n; ++j)
576
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
577
    input += n;
578 579 580
  }

  // inverse transform column vectors
581
  for (i = 0; i < n; ++i) {
582 583 584
    IHT_4x8[tx_type].cols(out[i], out[i]);
  }

585
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
586 587

  // Sum with the destination
588 589
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
590 591 592 593 594 595 596
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
597 598
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
599
  static const transform_2d IHT_8x4[] = {
clang-format's avatar
clang-format committed
600 601 602
    { idct4_c, idct8_c },    // DCT_DCT
    { iadst4_c, idct8_c },   // ADST_DCT
    { idct4_c, iadst8_c },   // DCT_ADST
603
    { iadst4_c, iadst8_c },  // ADST_ADST
clang-format's avatar
clang-format committed
604 605
    { iadst4_c, idct8_c },   // FLIPADST_DCT
    { idct4_c, iadst8_c },   // DCT_FLIPADST
606 607 608 609
    { iadst4_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst4_c, iadst8_c },  // ADST_FLIPADST
    { iadst4_c, iadst8_c },  // FLIPADST_ADST
    { iidtx4_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
610 611
    { idct4_c, iidtx8_c },   // V_DCT
    { iidtx4_c, idct8_c },   // H_DCT
612 613 614 615 616
    { iadst4_c, iidtx8_c },  // V_ADST
    { iidtx4_c, iadst8_c },  // H_ADST
    { iadst4_c, iidtx8_c },  // V_FLIPADST
    { iidtx4_c, iadst8_c },  // H_FLIPADST
  };
617 618
  const int n = 4;
  const int n2 = 8;
619 620 621 622

  int i, j;
  tran_low_t out[8][4], outtmp[8];
  tran_low_t *outp = &out[0][0];
623
  int outstride = n;
624 625

  // inverse transform row vectors and transpose
626
  for (i = 0; i < n; ++i) {
627
    IHT_8x4[tx_type].rows(input, outtmp);
628
    for (j = 0; j < n2; ++j)
629
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
630
    input += n2;
631 632 633
  }

  // inverse transform column vectors
634
  for (i = 0; i < n2; ++i) {
635 636 637
    IHT_8x4[tx_type].cols(out[i], out[i]);
  }

638
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
639 640

  // Sum with the destination
641 642
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
643 644 645 646 647 648 649
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
650 651
void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
652
  static const transform_2d IHT_8x16[] = {
clang-format's avatar
clang-format committed
653 654 655
    { idct16_c, idct8_c },    // DCT_DCT
    { iadst16_c, idct8_c },   // ADST_DCT
    { idct16_c, iadst8_c },   // DCT_ADST
656
    { iadst16_c, iadst8_c },  // ADST_ADST
clang-format's avatar
clang-format committed
657 658
    { iadst16_c, idct8_c },   // FLIPADST_DCT
    { idct16_c, iadst8_c },   // DCT_FLIPADST
659 660 661 662
    { iadst16_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst16_c, iadst8_c },  // ADST_FLIPADST
    { iadst16_c, iadst8_c },  // FLIPADST_ADST
    { iidtx16_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
663 664
    { idct16_c, iidtx8_c },   // V_DCT
    { iidtx16_c, idct8_c },   // H_DCT
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
    { iadst16_c, iidtx8_c },  // V_ADST
    { iidtx16_c, iadst8_c },  // H_ADST
    { iadst16_c, iidtx8_c },  // V_FLIPADST
    { iidtx16_c, iadst8_c },  // H_FLIPADST
  };

  const int n = 8;
  const int n2 = 16;
  int i, j;
  tran_low_t out[8][16], outtmp[8];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_8x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
683
    input += n;
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_8x16[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
703 704
void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           int tx_type) {
705
  static const transform_2d IHT_16x8[] = {
clang-format's avatar
clang-format committed
706 707 708
    { idct8_c, idct16_c },    // DCT_DCT
    { iadst8_c, idct16_c },   // ADST_DCT
    { idct8_c, iadst16_c },   // DCT_ADST
709
    { iadst8_c, iadst16_c },  // ADST_ADST
clang-format's avatar
clang-format committed
710 711
    { iadst8_c, idct16_c },   // FLIPADST_DCT
    { idct8_c, iadst16_c },   // DCT_FLIPADST
712 713 714 715
    { iadst8_c, iadst16_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst16_c },  // ADST_FLIPADST
    { iadst8_c, iadst16_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx16_c },  // IDTX
clang-format's avatar
clang-format committed
716 717
    { idct8_c, iidtx16_c },   // V_DCT
    { iidtx8_c, idct16_c },   // H_DCT
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
    { iadst8_c, iidtx16_c },  // V_ADST
    { iidtx8_c, iadst16_c },  // H_ADST
    { iadst8_c, iidtx16_c },  // V_FLIPADST
    { iidtx8_c, iadst16_c },  // H_FLIPADST
  };
  const int n = 8;
  const int n2 = 16;

  int i, j;
  tran_low_t out[16][8], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_16x8[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
736
    input += n2;
737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_16x8[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
756 757
void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
758
  static const transform_2d IHT_16x32[] = {
clang-format's avatar
clang-format committed
759 760 761
    { idct32_c, idct16_c },         // DCT_DCT
    { ihalfright32_c, idct16_c },   // ADST_DCT
    { idct32_c, iadst16_c },        // DCT_ADST
762
    { ihalfright32_c, iadst16_c },  // ADST_ADST
clang-format's avatar
clang-format committed
763 764
    { ihalfright32_c, idct16_c },   // FLIPADST_DCT
    { idct32_c, iadst16_c },        // DCT_FLIPADST
765 766 767
    { ihalfright32_c, iadst16_c },  // FLIPADST_FLIPADST
    { ihalfright32_c, iadst16_c },  // ADST_FLIPADST
    { ihalfright32_c, iadst16_c },  // FLIPADST_ADST
clang-format's avatar
clang-format committed
768 769 770
    { iidtx32_c, iidtx16_c },       // IDTX
    { idct32_c, iidtx16_c },        // V_DCT
    { iidtx32_c, idct16_c },        // H_DCT
771
    { ihalfright32_c, iidtx16_c },  // V_ADST
clang-format's avatar
clang-format committed
772
    { iidtx32_c, iadst16_c },       // H_ADST
773
    { ihalfright32_c, iidtx16_c },  // V_FLIPADST
clang-format's avatar
clang-format committed
774
    { iidtx32_c, iadst16_c },       // H_FLIPADST
775 776 777 778 779 780 781 782 783 784 785 786 787 788
  };

  const int n = 16;
  const int n2 = 32;
  int i, j;
  tran_low_t out[16][32], outtmp[16];
  tran_low_t *outp = &out[0][0];
  int outstride = n2;

  // inverse transform row vectors and transpose
  for (i = 0; i < n2; ++i) {
    IHT_16x32[tx_type].rows(input, outtmp);
    for (j = 0; j < n; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
789
    input += n;
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
  }

  // inverse transform column vectors
  for (i = 0; i < n; ++i) {
    IHT_16x32[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);

  // Sum with the destination
  for (i = 0; i < n2; ++i) {
    for (j = 0; j < n; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
809 810
void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
811
  static const transform_2d IHT_32x16[] = {
clang-format's avatar
clang-format committed
812 813 814
    { idct16_c, idct32_c },         // DCT_DCT
    { iadst16_c, idct32_c },        // ADST_DCT
    { idct16_c, ihalfright32_c },   // DCT_ADST
815
    { iadst16_c, ihalfright32_c },  // ADST_ADST
clang-format's avatar
clang-format committed
816 817
    { iadst16_c, idct32_c },        // FLIPADST_DCT
    { idct16_c, ihalfright32_c },   // DCT_FLIPADST
818 819 820
    { iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
    { iadst16_c, ihalfright32_c },  // ADST_FLIPADST
    { iadst16_c, ihalfright32_c },  // FLIPADST_ADST
clang-format's avatar
clang-format committed
821 822 823 824
    { iidtx16_c, iidtx32_c },       // IDTX
    { idct16_c, iidtx32_c },        // V_DCT
    { iidtx16_c, idct32_c },        // H_DCT
    { iadst16_c, iidtx32_c },       // V_ADST
825
    { iidtx16_c, ihalfright32_c },  // H_ADST
clang-format's avatar
clang-format committed
826
    { iadst16_c, iidtx32_c },       // V_FLIPADST
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
    { iidtx16_c, ihalfright32_c },  // H_FLIPADST
  };
  const int n = 16;
  const int n2 = 32;

  int i, j;
  tran_low_t out[32][16], outtmp[32];
  tran_low_t *outp = &out[0][0];
  int outstride = n;

  // inverse transform row vectors and transpose
  for (i = 0; i < n; ++i) {
    IHT_32x16[tx_type].rows(input, outtmp);
    for (j = 0; j < n2; ++j)
      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
clang-format's avatar
clang-format committed
842
    input += n2;
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
  }

  // inverse transform column vectors
  for (i = 0; i < n2; ++i) {
    IHT_32x16[tx_type].cols(out[i], out[i]);
  }

  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);

  // Sum with the destination
  for (i = 0; i < n; ++i) {
    for (j = 0; j < n2; ++j) {
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    }
  }
}
#endif  // CONFIG_EXT_TX

Yaowu Xu's avatar
Yaowu Xu committed
863 864
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                         int tx_type) {
865
  static const transform_2d IHT_8[] = {
clang-format's avatar
clang-format committed
866 867 868
    { idct8_c, idct8_c },    // DCT_DCT
    { iadst8_c, idct8_c },   // ADST_DCT
    { idct8_c, iadst8_c },   // DCT_ADST
869
    { iadst8_c, iadst8_c },  // ADST_ADST
870
#if CONFIG_EXT_TX
clang-format's avatar
clang-format committed
871 872
    { iadst8_c, idct8_c },   // FLIPADST_DCT
    { idct8_c, iadst8_c },   // DCT_FLIPADST
873 874 875 876
    { iadst8_c, iadst8_c },  // FLIPADST_FLIPADST
    { iadst8_c, iadst8_c },  // ADST_FLIPADST
    { iadst8_c, iadst8_c },  // FLIPADST_ADST
    { iidtx8_c, iidtx8_c },  // IDTX
clang-format's avatar
clang-format committed
877 878
    { idct8_c, iidtx8_c },   // V_DCT
    { iidtx8_c, idct8_c },   // H_DCT
879 880 881 882
    { iadst8_c, iidtx8_c },  // V_ADST
    { iidtx8_c, iadst8_c },  // H_ADST
    { iadst8_c, iidtx8_c },  // V_FLIPADST
    { iidtx8_c, iadst8_c },  // H_FLIPADST
clang-format's avatar
clang-format committed
883
#endif                       // CONFIG_EXT_TX
884 885
  };

Jingning Han's avatar
Jingning Han committed
886
  int i, j;
887 888 889 890
  tran_low_t tmp;
  tran_low_t out[8][8];
  tran_low_t *outp = &out[0][0];
  int outstride = 8;
Jingning Han's avatar
Jingning Han committed
891 892 893

  // inverse transform row vectors
  for (i = 0; i < 8; ++i) {
894
    IHT_8[tx_type].rows(input, out[i]);
clang-format's avatar
clang-format committed
895
    input += 8;
896 897 898
  }

  // transpose
clang-format's avatar
clang-format committed
899
  for (i = 1; i < 8; i++) {
900
    for (j = 0; j < i; j++) {
clang-format's avatar
clang-format committed
901
      tmp = out[i][j];
902 903 904
      out[i][j] = out[j][i];
      out[j][i] = tmp;
    }
Jingning Han's avatar
Jingning Han committed
905 906 907 908
  }

  // inverse transform column vectors
  for (i = 0; i < 8; ++i) {
909 910 911 912
    IHT_8[tx_type].cols(out[i], out[i]);
  }

#if CONFIG_EXT_TX
913
  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
914 915 916 917
#endif

  // Sum with the destination
  for (i = 0; i < 8; ++i) {
Jingning Han's avatar
Jingning Han committed
918
    for (j = 0; j < 8; ++j) {
919 920 921
      int d = i * stride + j;
      int s = j * outstride + i;
      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Jingning Han's avatar
Jingning Han committed
922 923 924 925
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
926 927
void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int tx_type) {
928
  static const transform_2d IHT_16[] = {
clang-format's avatar