av1_inv_txfm2d.c 25.9 KB
Newer Older
Angie Chiang's avatar
Angie Chiang committed
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Angie Chiang's avatar
Angie Chiang committed
3
 *
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Angie Chiang's avatar
Angie Chiang committed
10 11
 */

12
#include "./aom_dsp_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
13
#include "./av1_rtcd.h"
14
#include "aom_dsp/inv_txfm.h"
15
#include "av1/common/enums.h"
Yaowu Xu's avatar
Yaowu Xu committed
16 17
#include "av1/common/av1_txfm.h"
#include "av1/common/av1_inv_txfm1d.h"
18
#include "av1/common/av1_inv_txfm1d_cfg.h"
Angie Chiang's avatar
Angie Chiang committed
19

20 21
#define NO_INV_TRANSPOSE 1

22 23 24 25 26 27 28 29
static INLINE void clamp_buf(int32_t *buf, int32_t size, int8_t bit) {
  const int64_t maxValue = (1LL << (bit - 1)) - 1;
  const int64_t minValue = -(1LL << (bit - 1));

  for (int i = 0; i < size; ++i)
    buf[i] = (int32_t)clamp64(buf[i], minValue, maxValue);
}

Yaowu Xu's avatar
Yaowu Xu committed
30
static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
31
  switch (txfm_type) {
Yaowu Xu's avatar
Yaowu Xu committed
32 33 34 35
    case TXFM_TYPE_DCT4: return av1_idct4_new;
    case TXFM_TYPE_DCT8: return av1_idct8_new;
    case TXFM_TYPE_DCT16: return av1_idct16_new;
    case TXFM_TYPE_DCT32: return av1_idct32_new;
36 37 38
#if CONFIG_TX64X64
    case TXFM_TYPE_DCT64: return av1_idct64_new;
#endif  // CONFIG_TX64X64
Yaowu Xu's avatar
Yaowu Xu committed
39 40 41 42
    case TXFM_TYPE_ADST4: return av1_iadst4_new;
    case TXFM_TYPE_ADST8: return av1_iadst8_new;
    case TXFM_TYPE_ADST16: return av1_iadst16_new;
    case TXFM_TYPE_ADST32: return av1_iadst32_new;
43 44 45 46
    case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c;
    case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
    case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
    case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c;
47 48 49
#if CONFIG_TX64X64
    case TXFM_TYPE_IDENTITY64: return av1_iidentity64_c;
#endif  // CONFIG_TX64X64
clang-format's avatar
clang-format committed
50
    default: assert(0); return NULL;
51 52 53
  }
}

54
static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
55
  // DCT
56
  {
57 58
      &inv_txfm_1d_col_cfg_dct_4,     &inv_txfm_1d_col_cfg_dct_8,
      &inv_txfm_1d_col_cfg_dct_16,    &inv_txfm_1d_col_cfg_dct_32,
59 60 61
#if CONFIG_TX64X64
      &inv_txfm_1d_col_cfg_dct_64,
#endif  // CONFIG_TX64X64
62 63 64
      &inv_txfm_1d_col_cfg_dct_4x8,   &inv_txfm_1d_col_cfg_dct_8x4,
      &inv_txfm_1d_col_cfg_dct_8x16,  &inv_txfm_1d_col_cfg_dct_16x8,
      &inv_txfm_1d_col_cfg_dct_16x32, &inv_txfm_1d_col_cfg_dct_32x16,
65
#if CONFIG_TX64X64
66
      &inv_txfm_1d_col_cfg_dct_32x64, &inv_txfm_1d_col_cfg_dct_64x32,
67
#endif  // CONFIG_TX64X64
68 69
      &inv_txfm_1d_col_cfg_dct_4x16,  &inv_txfm_1d_col_cfg_dct_16x4,
      &inv_txfm_1d_col_cfg_dct_8x32,  &inv_txfm_1d_col_cfg_dct_32x8,
70
#if CONFIG_TX64X64
71
      &inv_txfm_1d_col_cfg_dct_16x64, &inv_txfm_1d_col_cfg_dct_64x16,
72 73
#endif  // CONFIG_TX64X64
  },
74
  // ADST
75 76 77 78 79 80 81 82
  {
      &inv_txfm_1d_col_cfg_adst_4,
      &inv_txfm_1d_col_cfg_adst_8,
      &inv_txfm_1d_col_cfg_adst_16,
      &inv_txfm_1d_col_cfg_adst_32,
#if CONFIG_TX64X64
      NULL,
#endif  // CONFIG_TX64X64
83 84 85 86 87 88
      &inv_txfm_1d_col_cfg_adst_4x8,
      &inv_txfm_1d_col_cfg_adst_8x4,
      &inv_txfm_1d_col_cfg_adst_8x16,
      &inv_txfm_1d_col_cfg_adst_16x8,
      &inv_txfm_1d_col_cfg_adst_16x32,
      &inv_txfm_1d_col_cfg_adst_32x16,
89 90
#if CONFIG_TX64X64
      NULL,
91
      NULL,
92
#endif  // CONFIG_TX64X64
93 94 95 96
      &inv_txfm_1d_col_cfg_adst_4x16,
      &inv_txfm_1d_col_cfg_adst_16x4,
      &inv_txfm_1d_col_cfg_adst_8x32,
      &inv_txfm_1d_col_cfg_adst_32x8,
97
#if CONFIG_TX64X64
98
      NULL,
99
      NULL,
100 101
#endif  // CONFIG_TX64X64
  },
102
  // FLIPADST
103 104 105 106 107 108 109 110
  {
      &inv_txfm_1d_col_cfg_adst_4,
      &inv_txfm_1d_col_cfg_adst_8,
      &inv_txfm_1d_col_cfg_adst_16,
      &inv_txfm_1d_col_cfg_adst_32,
#if CONFIG_TX64X64
      NULL,
#endif  // CONFIG_TX64X64
111 112 113 114 115 116
      &inv_txfm_1d_col_cfg_adst_4x8,
      &inv_txfm_1d_col_cfg_adst_8x4,
      &inv_txfm_1d_col_cfg_adst_8x16,
      &inv_txfm_1d_col_cfg_adst_16x8,
      &inv_txfm_1d_col_cfg_adst_16x32,
      &inv_txfm_1d_col_cfg_adst_32x16,
117 118
#if CONFIG_TX64X64
      NULL,
119
      NULL,
120
#endif  // CONFIG_TX64X64
121 122 123 124
      &inv_txfm_1d_col_cfg_adst_4x16,
      &inv_txfm_1d_col_cfg_adst_16x4,
      &inv_txfm_1d_col_cfg_adst_8x32,
      &inv_txfm_1d_col_cfg_adst_32x8,
125
#if CONFIG_TX64X64
126
      NULL,
127
      NULL,
128 129
#endif  // CONFIG_TX64X64
  },
130
  // IDENTITY
131
  {
132 133
      &inv_txfm_1d_col_cfg_identity_4,  &inv_txfm_1d_col_cfg_identity_8,
      &inv_txfm_1d_col_cfg_identity_16, &inv_txfm_1d_col_cfg_identity_32,
134
#if CONFIG_TX64X64
135
      &inv_txfm_1d_col_cfg_identity_64,
136
#endif  // CONFIG_TX64X64
137 138 139
      &inv_txfm_1d_col_cfg_identity_8,  &inv_txfm_1d_col_cfg_identity_4,
      &inv_txfm_1d_col_cfg_identity_16, &inv_txfm_1d_col_cfg_identity_8,
      &inv_txfm_1d_col_cfg_identity_32, &inv_txfm_1d_col_cfg_identity_16,
140
#if CONFIG_TX64X64
141
      &inv_txfm_1d_col_cfg_identity_64, &inv_txfm_1d_col_cfg_identity_32,
142
#endif  // CONFIG_TX64X64
143 144
      &inv_txfm_1d_col_cfg_identity_16, &inv_txfm_1d_col_cfg_identity_4,
      &inv_txfm_1d_col_cfg_identity_32, &inv_txfm_1d_col_cfg_identity_8,
145
#if CONFIG_TX64X64
146
      &inv_txfm_1d_col_cfg_identity_64, &inv_txfm_1d_col_cfg_identity_16,
147 148
#endif  // CONFIG_TX64X64
  },
149 150
};

151
static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
152
  // DCT
153
  {
154 155
      &inv_txfm_1d_row_cfg_dct_4,    &inv_txfm_1d_row_cfg_dct_8,
      &inv_txfm_1d_row_cfg_dct_16,   &inv_txfm_1d_row_cfg_dct_32,
156 157
#if CONFIG_TX64X64
      &inv_txfm_1d_row_cfg_dct_64,
158
#endif  // CONFIG_TX64X64
159 160 161
      &inv_txfm_1d_row_cfg_dct_4,    &inv_txfm_1d_row_cfg_dct_8,
      &inv_txfm_1d_row_cfg_dct_8x16, &inv_txfm_1d_row_cfg_dct_16,
      &inv_txfm_1d_row_cfg_dct_16,   &inv_txfm_1d_row_cfg_dct_32,
162
#if CONFIG_TX64X64
163
      &inv_txfm_1d_row_cfg_dct_32,   &inv_txfm_1d_row_cfg_dct_64,
164
#endif  // CONFIG_TX64X64
165 166
      &inv_txfm_1d_row_cfg_dct_4,    &inv_txfm_1d_row_cfg_dct_16,
      &inv_txfm_1d_row_cfg_dct_8x32, &inv_txfm_1d_row_cfg_dct_32,
167
#if CONFIG_TX64X64
168
      &inv_txfm_1d_row_cfg_dct_16,   &inv_txfm_1d_row_cfg_dct_64,
169 170
#endif  // CONFIG_TX64X64
  },
171
  // ADST
172 173 174 175 176 177 178 179 180 181
  {
      &inv_txfm_1d_row_cfg_adst_4,
      &inv_txfm_1d_row_cfg_adst_8,
      &inv_txfm_1d_row_cfg_adst_16,
      &inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
      NULL,
#endif  // CONFIG_TX64X64
      &inv_txfm_1d_row_cfg_adst_4,
      &inv_txfm_1d_row_cfg_adst_8,
182
      &inv_txfm_1d_row_cfg_adst_8x16,
183 184 185 186 187 188 189 190 191
      &inv_txfm_1d_row_cfg_adst_16,
      &inv_txfm_1d_row_cfg_adst_16,
      &inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
      &inv_txfm_1d_row_cfg_adst_32,
      NULL,
#endif  // CONFIG_TX64X64
      &inv_txfm_1d_row_cfg_adst_4,
      &inv_txfm_1d_row_cfg_adst_16,
192
      &inv_txfm_1d_row_cfg_adst_8x32,
193
      &inv_txfm_1d_row_cfg_adst_32,
194
#if CONFIG_TX64X64
195 196
      &inv_txfm_1d_row_cfg_adst_16,
      NULL,
197 198
#endif  // CONFIG_TX64X64
  },
199
  // FLIPADST
200 201 202 203 204 205 206 207 208 209
  {
      &inv_txfm_1d_row_cfg_adst_4,
      &inv_txfm_1d_row_cfg_adst_8,
      &inv_txfm_1d_row_cfg_adst_16,
      &inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
      NULL,
#endif  // CONFIG_TX64X64
      &inv_txfm_1d_row_cfg_adst_4,
      &inv_txfm_1d_row_cfg_adst_8,
210
      &inv_txfm_1d_row_cfg_adst_8x16,
211 212 213 214 215 216 217 218 219
      &inv_txfm_1d_row_cfg_adst_16,
      &inv_txfm_1d_row_cfg_adst_16,
      &inv_txfm_1d_row_cfg_adst_32,
#if CONFIG_TX64X64
      &inv_txfm_1d_row_cfg_adst_32,
      NULL,
#endif  // CONFIG_TX64X64
      &inv_txfm_1d_row_cfg_adst_4,
      &inv_txfm_1d_row_cfg_adst_16,
220
      &inv_txfm_1d_row_cfg_adst_8x32,
221
      &inv_txfm_1d_row_cfg_adst_32,
222
#if CONFIG_TX64X64
223 224
      &inv_txfm_1d_row_cfg_adst_16,
      NULL,
225 226
#endif  // CONFIG_TX64X64
  },
227
  // IDENTITY
228
  {
229 230
      &inv_txfm_1d_row_cfg_identity_4,  &inv_txfm_1d_row_cfg_identity_8,
      &inv_txfm_1d_row_cfg_identity_16, &inv_txfm_1d_row_cfg_identity_32,
231
#if CONFIG_TX64X64
232
      &inv_txfm_1d_row_cfg_identity_64,
233
#endif  // CONFIG_TX64X64
234 235 236
      &inv_txfm_1d_row_cfg_identity_4,  &inv_txfm_1d_row_cfg_identity_8,
      &inv_txfm_1d_row_cfg_identity_8,  &inv_txfm_1d_row_cfg_identity_16,
      &inv_txfm_1d_row_cfg_identity_16, &inv_txfm_1d_row_cfg_identity_32,
237
#if CONFIG_TX64X64
238
      &inv_txfm_1d_row_cfg_identity_32, &inv_txfm_1d_row_cfg_identity_64,
239
#endif  // CONFIG_TX64X64
240 241
      &inv_txfm_1d_row_cfg_identity_4,  &inv_txfm_1d_row_cfg_identity_16,
      &inv_txfm_1d_row_cfg_identity_8,  &inv_txfm_1d_row_cfg_identity_32,
242
#if CONFIG_TX64X64
243
      &inv_txfm_1d_row_cfg_identity_16, &inv_txfm_1d_row_cfg_identity_64,
244 245
#endif  // CONFIG_TX64X64
  },
246 247
};

248 249 250 251 252 253 254
static const int8_t inv_shift_4x4[2] = { 0, -4 };
static const int8_t inv_shift_8x8[2] = { 0, -5 };
static const int8_t inv_shift_16x16[2] = { -1, -5 };
static const int8_t inv_shift_32x32[2] = { -1, -5 };
#if CONFIG_TX64X64
static const int8_t inv_shift_64x64[2] = { -1, -5 };
#endif
255 256 257 258 259 260 261 262 263
static const int8_t inv_shift_4x8[2] = { 0, -4 };
static const int8_t inv_shift_8x4[2] = { 0, -4 };
static const int8_t inv_shift_8x16[2] = { -1, -4 };
static const int8_t inv_shift_16x8[2] = { -1, -4 };
static const int8_t inv_shift_16x32[2] = { -1, -4 };
static const int8_t inv_shift_32x16[2] = { -1, -4 };
#if CONFIG_TX64X64
static const int8_t inv_shift_32x64[2] = { -1, -4 };
static const int8_t inv_shift_64x32[2] = { -1, -4 };
264
#endif
265 266
static const int8_t inv_shift_4x16[2] = { -1, -4 };
static const int8_t inv_shift_16x4[2] = { -1, -4 };
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
static const int8_t inv_shift_8x32[2] = { -1, -5 };
static const int8_t inv_shift_32x8[2] = { -1, -5 };
#if CONFIG_TX64X64
static const int8_t inv_shift_16x64[2] = { -1, -5 };
static const int8_t inv_shift_64x16[2] = { -1, -5 };
#endif  // CONFIG_TX64X64

const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = {
  inv_shift_4x4,   inv_shift_8x8,   inv_shift_16x16, inv_shift_32x32,
#if CONFIG_TX64X64
  inv_shift_64x64,
#endif  // CONFIG_TX64X64
  inv_shift_4x8,   inv_shift_8x4,   inv_shift_8x16,  inv_shift_16x8,
  inv_shift_16x32, inv_shift_32x16,
#if CONFIG_TX64X64
  inv_shift_32x64, inv_shift_64x32,
#endif  // CONFIG_TX64X64
  inv_shift_4x16,  inv_shift_16x4,  inv_shift_8x32,  inv_shift_32x8,
#if CONFIG_TX64X64
  inv_shift_16x64, inv_shift_64x16,
#endif  // CONFIG_TX64X64
};

290 291 292 293
void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
                          TXFM_2D_FLIP_CFG *cfg) {
  assert(cfg != NULL);
  set_flip_cfg(tx_type, cfg);
294 295
  const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
  const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
296 297
  cfg->col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size];
  cfg->row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size];
298
  cfg->shift = inv_txfm_shift_ls[tx_size];
299 300
}

301
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
302
                             const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
303
                             int bd) {
304
  const int fwd_shift = inv_start_range[tx_size];
305
  const int8_t *shift = cfg->shift;
306 307
  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
  for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
308
    stage_range_row[i] = cfg->row_cfg->stage_range[i] + fwd_shift + bd + 1;
309 310 311
  }
  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
  for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
312 313
    stage_range_col[i] =
        cfg->col_cfg->stage_range[i] + fwd_shift + shift[0] + bd + 1;
314 315 316
  }
}

317
static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
318
                                    int stride, TXFM_2D_FLIP_CFG *cfg,
319
                                    int32_t *txfm_buf, TX_SIZE tx_size,
320
                                    int bd) {
321 322 323 324 325 326 327 328
  // Note when assigning txfm_size_col, we use the txfm_size from the
  // row configuration and vice versa. This is intentionally done to
  // accurately perform rectangular transforms. When the transform is
  // rectangular, the number of columns will be the same as the
  // txfm_size stored in the row cfg struct. It will make no difference
  // for square transforms.
  const int txfm_size_col = cfg->row_cfg->txfm_size;
  const int txfm_size_row = cfg->col_cfg->txfm_size;
329
  // Take the shift from the larger dimension in the rectangular case.
330
  const int8_t *shift = cfg->shift;
331
  const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
332 333 334 335
  int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
  int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
  assert(cfg->row_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
  assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
336
  av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd);
337

338 339 340 341
  const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
  const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
  const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->col_cfg->txfm_type);
  const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->row_cfg->txfm_type);
Angie Chiang's avatar
Angie Chiang committed
342

343 344
  // txfm_buf's length is  txfm_size_row * txfm_size_col + 2 *
  // AOMMAX(txfm_size_row, txfm_size_col)
Angie Chiang's avatar
Angie Chiang committed
345 346
  // it is used for intermediate data buffering
  int32_t *temp_in = txfm_buf;
347 348
  int32_t *temp_out = temp_in + AOMMAX(txfm_size_row, txfm_size_col);
  int32_t *buf = temp_out + AOMMAX(txfm_size_row, txfm_size_col);
Angie Chiang's avatar
Angie Chiang committed
349
  int32_t *buf_ptr = buf;
350
  int c, r;
Angie Chiang's avatar
Angie Chiang committed
351 352

  // Rows
353
  for (r = 0; r < txfm_size_row; ++r) {
354 355 356 357 358 359 360 361
    if (abs(rect_type) == 1) {
      for (c = 0; c < txfm_size_col; ++c) {
        temp_in[c] = (int32_t)dct_const_round_shift(input[c] * InvSqrt2);
      }
      txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
    } else {
      txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
    }
362
    av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
363
    clamp_buf(buf_ptr, txfm_size_col, bd + 8);
364 365
    input += txfm_size_col;
    buf_ptr += txfm_size_col;
Angie Chiang's avatar
Angie Chiang committed
366 367 368
  }

  // Columns
369
  for (c = 0; c < txfm_size_col; ++c) {
370
    if (cfg->lr_flip == 0) {
371 372
      for (r = 0; r < txfm_size_row; ++r)
        temp_in[r] = buf[r * txfm_size_col + c];
373 374
    } else {
      // flip left right
375 376
      for (r = 0; r < txfm_size_row; ++r)
        temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
377
    }
Angie Chiang's avatar
Angie Chiang committed
378
    txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
379
    av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
380
    clamp_buf(temp_out, txfm_size_row, bd + 1);
381
    if (cfg->ud_flip == 0) {
382 383 384 385
      for (r = 0; r < txfm_size_row; ++r) {
        output[r * stride + c] =
            highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
      }
386 387
    } else {
      // flip upside down
388 389 390 391
      for (r = 0; r < txfm_size_row; ++r) {
        output[r * stride + c] = highbd_clip_pixel_add(
            output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
      }
392
    }
Angie Chiang's avatar
Angie Chiang committed
393 394 395
  }
}

396 397
static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
                                         int stride, int32_t *txfm_buf,
398 399
                                         TX_TYPE tx_type, TX_SIZE tx_size,
                                         int bd) {
400 401
  TXFM_2D_FLIP_CFG cfg;
  av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg);
402 403
  // Forward shift sum uses larger square size, to be consistent with what
  // av1_gen_inv_stage_range() does for inverse shifts.
404
  inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd);
405 406 407
}

void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
408
                              int stride, TX_TYPE tx_type, int bd) {
409
  DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]);
410 411 412 413
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd);
}

void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
414
                              int stride, TX_TYPE tx_type, int bd) {
415
  DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
416 417 418
#if NO_INV_TRANSPOSE
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd);
#else
419 420
  int32_t rinput[8 * 4];
  uint16_t routput[8 * 4];
421 422
  TX_SIZE tx_size = TX_8X4;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
423
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
424 425 426 427 428 429 430 431
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int32(rinput, rw, input, w, w, h);
  transpose_uint16(routput, rw, output, stride, w, h);
  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
  transpose_uint16(output, stride, routput, rw, rw, rh);
432
#endif  // NO_INV_TRANSPOSE
433 434 435
}

void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
436
                               int stride, TX_TYPE tx_type, int bd) {
437
  DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]);
438 439 440 441
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd);
}

void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
442
                               int stride, TX_TYPE tx_type, int bd) {
443
  DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]);
444 445 446
#if NO_INV_TRANSPOSE
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd);
#else
447 448
  int32_t rinput[16 * 8];
  uint16_t routput[16 * 8];
449 450
  TX_SIZE tx_size = TX_16X8;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
451
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
452 453 454 455 456 457 458 459
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int32(rinput, rw, input, w, w, h);
  transpose_uint16(routput, rw, output, stride, w, h);
  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
  transpose_uint16(output, stride, routput, rw, rw, rh);
460
#endif  // NO_INV_TRANSPOSE
461 462 463
}

void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
464
                                int stride, TX_TYPE tx_type, int bd) {
465
  DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]);
466 467 468 469
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd);
}

void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
470
                                int stride, TX_TYPE tx_type, int bd) {
471
  DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]);
472 473 474
#if NO_INV_TRANSPOSE
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd);
#else
475 476
  int32_t rinput[32 * 16];
  uint16_t routput[32 * 16];
477 478
  TX_SIZE tx_size = TX_32X16;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
479
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
480 481 482 483 484 485 486 487
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int32(rinput, rw, input, w, w, h);
  transpose_uint16(routput, rw, output, stride, w, h);
  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
  transpose_uint16(output, stride, routput, rw, rw, rh);
488
#endif  // NO_INV_TRANSPOSE
489 490 491
}

void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
492
                              int stride, TX_TYPE tx_type, int bd) {
493
  DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]);
494
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd);
Angie Chiang's avatar
Angie Chiang committed
495 496
}

Yaowu Xu's avatar
Yaowu Xu committed
497
void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
498
                              int stride, TX_TYPE tx_type, int bd) {
499
  DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]);
500
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd);
Angie Chiang's avatar
Angie Chiang committed
501 502
}

Yaowu Xu's avatar
Yaowu Xu committed
503
void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
504
                                int stride, TX_TYPE tx_type, int bd) {
505
  DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]);
506
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd);
Angie Chiang's avatar
Angie Chiang committed
507 508
}

Yaowu Xu's avatar
Yaowu Xu committed
509
void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
510
                                int stride, TX_TYPE tx_type, int bd) {
511
  DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
512
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd);
Angie Chiang's avatar
Angie Chiang committed
513
}
Angie Chiang's avatar
Angie Chiang committed
514

515
#if CONFIG_TX64X64
Yaowu Xu's avatar
Yaowu Xu committed
516
void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
517
                                int stride, TX_TYPE tx_type, int bd) {
518 519 520 521 522 523 524 525 526 527
  // TODO(urvang): Can the same array be reused, instead of using a new array?
  // Remap 32x32 input into a modified 64x64 by:
  // - Copying over these values in top-left 32x32 locations.
  // - Setting the rest of the locations to 0.
  int32_t mod_input[64 * 64];
  for (int row = 0; row < 32; ++row) {
    memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
    memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
  }
  memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input));
528
  DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]);
529 530
  inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64,
                        bd);
Angie Chiang's avatar
Angie Chiang committed
531
}
532 533

void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
534
                                int stride, TX_TYPE tx_type, int bd) {
535 536 537 538 539 540 541 542
  // Remap 32x32 input into a modified 64x32 by:
  // - Copying over these values in top-left 32x32 locations.
  // - Setting the rest of the locations to 0.
  int32_t mod_input[64 * 32];
  for (int row = 0; row < 32; ++row) {
    memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
    memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
  }
543
  DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
544 545 546 547
#if NO_INV_TRANSPOSE
  inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32,
                        bd);
#else
548 549
  int32_t rinput[64 * 32];
  uint16_t routput[64 * 32];
550 551
  TX_SIZE tx_size = TX_64X32;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
552
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
553 554 555 556
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
557
  transpose_int32(rinput, rw, mod_input, w, w, h);
558 559 560
  transpose_uint16(routput, rw, output, stride, w, h);
  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
  transpose_uint16(output, stride, routput, rw, rw, rh);
561
#endif  // NO_INV_TRANSPOSE
562 563 564
}

void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
565
                                int stride, TX_TYPE tx_type, int bd) {
566 567 568 569 570 571
  // Remap 32x32 input into a modified 32x64 input by:
  // - Copying over these values in top-left 32x32 locations.
  // - Setting the rest of the locations to 0.
  int32_t mod_input[32 * 64];
  memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input));
  memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input));
572
  DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
573 574
  inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64,
                        bd);
575
}
576 577 578

void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output,
                                int stride, TX_TYPE tx_type, int bd) {
579 580 581 582 583 584
  // Remap 16x32 input into a modified 16x64 input by:
  // - Copying over these values in top-left 16x32 locations.
  // - Setting the rest of the locations to 0.
  int32_t mod_input[16 * 64];
  memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input));
  memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input));
585
  DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
586 587
  inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64,
                        bd);
588 589 590 591
}

void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
                                int stride, TX_TYPE tx_type, int bd) {
592 593 594 595 596 597 598 599
  // Remap 32x16 input into a modified 64x16 by:
  // - Copying over these values in top-left 32x16 locations.
  // - Setting the rest of the locations to 0.
  int32_t mod_input[64 * 16];
  for (int row = 0; row < 16; ++row) {
    memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
    memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
  }
600
  DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
601 602 603 604
#if NO_INV_TRANSPOSE
  inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16,
                        bd);
#else
605 606 607 608 609 610 611 612 613
  int32_t rinput[16 * 64];
  uint16_t routput[16 * 64];
  TX_SIZE tx_size = TX_64X16;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
614
  transpose_int32(rinput, rw, mod_input, w, w, h);
615 616 617
  transpose_uint16(routput, rw, output, stride, w, h);
  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
  transpose_uint16(output, stride, routput, rw, rw, rh);
618
#endif  // NO_INV_TRANSPOSE
619
}
620
#endif  // CONFIG_TX64X64
621 622 623

void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output,
                               int stride, TX_TYPE tx_type, int bd) {
624
  DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
625 626 627 628 629
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd);
}

void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output,
                               int stride, TX_TYPE tx_type, int bd) {
630
  DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
631 632 633
#if NO_INV_TRANSPOSE
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd);
#else
634 635 636 637 638 639 640 641 642 643 644 645 646
  int32_t rinput[4 * 16];
  uint16_t routput[4 * 16];
  TX_SIZE tx_size = TX_16X4;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int32(rinput, rw, input, w, w, h);
  transpose_uint16(routput, rw, output, stride, w, h);
  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
  transpose_uint16(output, stride, routput, rw, rw, rh);
647
#endif  // NO_INV_TRANSPOSE
648 649 650 651
}

void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output,
                               int stride, TX_TYPE tx_type, int bd) {
652
  DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
653 654 655 656 657
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd);
}

void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output,
                               int stride, TX_TYPE tx_type, int bd) {
658
  DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
659 660 661
#if NO_INV_TRANSPOSE
  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd);
#else
662 663 664 665 666 667 668 669 670 671 672 673 674
  int32_t rinput[8 * 32];
  uint16_t routput[8 * 32];
  TX_SIZE tx_size = TX_32X8;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int32(rinput, rw, input, w, w, h);
  transpose_uint16(routput, rw, output, stride, w, h);
  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
  transpose_uint16(output, stride, routput, rw, rw, rh);
675
#endif  // NO_INV_TRANSPOSE
676
}