av1_fwd_txfm2d.c 21.8 KB
Newer Older
Angie Chiang's avatar
Angie Chiang committed
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Angie Chiang's avatar
Angie Chiang committed
3
 *
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Angie Chiang's avatar
Angie Chiang committed
10 11
 */

12 13
#include <assert.h>

14
#include "./aom_dsp_rtcd.h"
Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./av1_rtcd.h"
16
#include "aom_dsp/txfm_common.h"
17
#include "av1/common/enums.h"
Yaowu Xu's avatar
Yaowu Xu committed
18
#include "av1/common/av1_txfm.h"
19 20
#include "av1/encoder/av1_fwd_txfm1d.h"
#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
Angie Chiang's avatar
Angie Chiang committed
21

22 23
#define NO_FWD_TRANSPOSE 1

Yaowu Xu's avatar
Yaowu Xu committed
24
static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
25
  switch (txfm_type) {
Yaowu Xu's avatar
Yaowu Xu committed
26 27 28 29
    case TXFM_TYPE_DCT4: return av1_fdct4_new;
    case TXFM_TYPE_DCT8: return av1_fdct8_new;
    case TXFM_TYPE_DCT16: return av1_fdct16_new;
    case TXFM_TYPE_DCT32: return av1_fdct32_new;
30 31 32
#if CONFIG_TX64X64
    case TXFM_TYPE_DCT64: return av1_fdct64_new;
#endif  // CONFIG_TX64X64
Yaowu Xu's avatar
Yaowu Xu committed
33 34 35 36
    case TXFM_TYPE_ADST4: return av1_fadst4_new;
    case TXFM_TYPE_ADST8: return av1_fadst8_new;
    case TXFM_TYPE_ADST16: return av1_fadst16_new;
    case TXFM_TYPE_ADST32: return av1_fadst32_new;
37 38 39 40
    case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c;
    case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
    case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
    case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
41 42 43
#if CONFIG_TX64X64
    case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
#endif  // CONFIG_TX64X64
clang-format's avatar
clang-format committed
44
    default: assert(0); return NULL;
45 46 47
  }
}

48 49 50
void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
                             const TXFM_2D_FLIP_CFG *cfg, int bd) {
  // Take the shift from the larger dimension in the rectangular case.
51
  const int8_t *shift = cfg->shift;
52
  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
Angie Chiang's avatar
Angie Chiang committed
53 54
  for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) {
    stage_range_col[i] = cfg->stage_range_col[i] + shift[0] + bd + 1;
55 56 57
  }

  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
Angie Chiang's avatar
Angie Chiang committed
58 59
  for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) {
    stage_range_row[i] = cfg->stage_range_row[i] + shift[0] + shift[1] + bd + 1;
60 61 62
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
63
static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
64
                                const int stride, const TXFM_2D_FLIP_CFG *cfg,
65
                                int32_t *buf, int bd) {
66
  int c, r;
67 68 69 70 71 72
  // Note when assigning txfm_size_col, we use the txfm_size from the
  // row configuration and vice versa. This is intentionally done to
  // accurately perform rectangular transforms. When the transform is
  // rectangular, the number of columns will be the same as the
  // txfm_size stored in the row cfg struct. It will make no difference
  // for square transforms.
Angie Chiang's avatar
Angie Chiang committed
73 74
  const int txfm_size_col = tx_size_wide[cfg->tx_size];
  const int txfm_size_row = tx_size_high[cfg->tx_size];
75
  // Take the shift from the larger dimension in the rectangular case.
76
  const int8_t *shift = cfg->shift;
77
  const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
78 79
  int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
  int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
Angie Chiang's avatar
Angie Chiang committed
80 81
  assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
  assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
82 83
  av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bd);

84 85
  const int8_t cos_bit_col = cfg->cos_bit_col;
  const int8_t cos_bit_row = cfg->cos_bit_row;
Angie Chiang's avatar
Angie Chiang committed
86 87
  const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
  const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
Angie Chiang's avatar
Angie Chiang committed
88

89
  // use output buffer as temp buffer
clang-format's avatar
clang-format committed
90
  int32_t *temp_in = output;
91
  int32_t *temp_out = output + txfm_size_row;
Angie Chiang's avatar
Angie Chiang committed
92 93

  // Columns
94
  for (c = 0; c < txfm_size_col; ++c) {
95
    if (cfg->ud_flip == 0) {
96
      for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * stride + c];
97
    } else {
98
      for (r = 0; r < txfm_size_row; ++r)
99
        // flip upside down
100 101
        temp_in[r] = input[(txfm_size_row - r - 1) * stride + c];
    }
102
    av1_round_shift_array(temp_in, txfm_size_row, -shift[0]);
103
    txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
104
    av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
105
    if (cfg->lr_flip == 0) {
106 107
      for (r = 0; r < txfm_size_row; ++r)
        buf[r * txfm_size_col + c] = temp_out[r];
108
    } else {
109
      for (r = 0; r < txfm_size_row; ++r)
110
        // flip from left to right
111
        buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r];
112
    }
Angie Chiang's avatar
Angie Chiang committed
113 114 115
  }

  // Rows
116 117 118
  for (r = 0; r < txfm_size_row; ++r) {
    txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col,
                  cos_bit_row, stage_range_row);
119 120 121
    if (abs(rect_type) == 1) {
      // Multiply everything by Sqrt2 if the transform is rectangular and the
      // size difference is a factor of 2.
122 123
      for (c = 0; c < txfm_size_col; ++c)
        output[r * txfm_size_col + c] =
124
            (int32_t)fdct_round_shift(output[r * txfm_size_col + c] * Sqrt2);
125
    }
126
    av1_round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift[2]);
Angie Chiang's avatar
Angie Chiang committed
127 128 129
  }
}

130
void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
131
                          TX_TYPE tx_type, int bd) {
132
  DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 8]);
133 134 135 136 137
  TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
  av1_get_fwd_txfm_cfg(tx_type, TX_4X8, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
138
  int16_t rinput[4 * 8];
139 140
  TX_SIZE tx_size = TX_4X8;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
141
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
142 143 144 145 146
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int16(rinput, rw, input, stride, w, h);
147
  av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
148
  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
149
  transpose_int32(output, w, txfm_buf, rw, rw, rh);
150
#endif  // NO_FWD_TRANSPOSE
151 152 153
}

void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
154
                          TX_TYPE tx_type, int bd) {
155
  int32_t txfm_buf[8 * 4];
156 157
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_8X4, &cfg);
158
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
159 160 161
}

void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
162
                           TX_TYPE tx_type, int bd) {
163
  DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 16]);
164 165 166 167 168
  TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
  av1_get_fwd_txfm_cfg(tx_type, TX_8X16, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
169
  int16_t rinput[8 * 16];
170 171
  TX_SIZE tx_size = TX_8X16;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
172
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
173 174 175 176 177
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int16(rinput, rw, input, stride, w, h);
178
  av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
179
  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
180
  transpose_int32(output, w, txfm_buf, rw, rw, rh);
181
#endif  // NO_FWD_TRANSPOSE
182 183 184
}

void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
185
                           TX_TYPE tx_type, int bd) {
186
  int32_t txfm_buf[16 * 8];
187 188
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_16X8, &cfg);
189
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
190 191 192
}

void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
193
                            TX_TYPE tx_type, int bd) {
194
  DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 32]);
195 196 197 198 199
  TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
  av1_get_fwd_txfm_cfg(tx_type, TX_16X32, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
200
  int16_t rinput[16 * 32];
201 202
  TX_SIZE tx_size = TX_16X32;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
203
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
204 205 206 207 208
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int16(rinput, rw, input, stride, w, h);
209
  av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
210
  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
211
  transpose_int32(output, w, txfm_buf, rw, rw, rh);
212
#endif  // NO_FWD_TRANSPOSE
213 214 215
}

void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
216
                            TX_TYPE tx_type, int bd) {
217
  int32_t txfm_buf[32 * 16];
218 219
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_32X16, &cfg);
220
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
221 222
}

223 224
void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride,
                           TX_TYPE tx_type, int bd) {
225
  DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 16]);
226 227 228 229 230
  TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
  av1_get_fwd_txfm_cfg(tx_type, TX_4X16, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
231 232 233 234 235 236 237 238 239
  int16_t rinput[4 * 16];
  TX_SIZE tx_size = TX_4X16;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int16(rinput, rw, input, stride, w, h);
240
  av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
241 242
  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
  transpose_int32(output, w, txfm_buf, rw, rw, rh);
243
#endif  // NO_FWD_TRANSPOSE
244 245 246 247 248
}

void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride,
                           TX_TYPE tx_type, int bd) {
  int32_t txfm_buf[16 * 4];
249 250
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_16X4, &cfg);
251 252 253 254 255
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
}

void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride,
                           TX_TYPE tx_type, int bd) {
256
  DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 8]);
257 258 259 260 261
  TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
  av1_get_fwd_txfm_cfg(tx_type, TX_8X32, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
262 263 264 265 266 267 268 269 270
  int16_t rinput[32 * 8];
  TX_SIZE tx_size = TX_8X32;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int16(rinput, rw, input, stride, w, h);
271
  av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
272 273
  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
  transpose_int32(output, w, txfm_buf, rw, rw, rh);
274
#endif  // NO_FWD_TRANSPOSE
275 276 277 278 279
}

void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride,
                           TX_TYPE tx_type, int bd) {
  int32_t txfm_buf[32 * 8];
280 281
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_32X8, &cfg);
282 283 284
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
}

Yaowu Xu's avatar
Yaowu Xu committed
285
void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
286
                          TX_TYPE tx_type, int bd) {
287
  int32_t txfm_buf[4 * 4];
288 289
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_4X4, &cfg);
290
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
Angie Chiang's avatar
Angie Chiang committed
291
}
292

Yaowu Xu's avatar
Yaowu Xu committed
293
void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
294
                          TX_TYPE tx_type, int bd) {
295
  int32_t txfm_buf[8 * 8];
296 297
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_8X8, &cfg);
298
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
299 300
}

Yaowu Xu's avatar
Yaowu Xu committed
301
void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
302
                            TX_TYPE tx_type, int bd) {
303
  int32_t txfm_buf[16 * 16];
304 305
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_16X16, &cfg);
306
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
307 308
}

Yaowu Xu's avatar
Yaowu Xu committed
309
void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
310
                            TX_TYPE tx_type, int bd) {
311
  int32_t txfm_buf[32 * 32];
312 313
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg);
314
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
315
}
Angie Chiang's avatar
Angie Chiang committed
316

317
#if CONFIG_TX64X64
Yaowu Xu's avatar
Yaowu Xu committed
318
void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
319
                            TX_TYPE tx_type, int bd) {
320
  int32_t txfm_buf[64 * 64];
321 322
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg);
323
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
324 325 326 327 328 329 330

  // Zero out top-right 32x32 area.
  for (int row = 0; row < 32; ++row) {
    memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
  }
  // Zero out the bottom 64x32 area.
  memset(output + 32 * 64, 0, 32 * 64 * sizeof(*output));
331 332 333 334
  // Re-pack non-zero coeffs in the first 32x32 indices.
  for (int row = 1; row < 32; ++row) {
    memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
  }
335 336
}

337
void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
338
                            TX_TYPE tx_type, int bd) {
339
  DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 64]);
340 341 342 343 344
  TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
  av1_get_fwd_txfm_cfg(tx_type, TX_32X64, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
345 346 347 348 349 350 351 352 353 354 355 356
  int16_t rinput[64 * 32];
  TX_SIZE tx_size = TX_32X64;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int16(rinput, rw, input, stride, w, h);
  av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
  transpose_int32(output, w, txfm_buf, rw, rw, rh);
357
#endif  // NO_FWD_TRANSPOSE
358 359 360

  // Zero out the bottom 32x32 area.
  memset(output + 32 * 32, 0, 32 * 32 * sizeof(*output));
361
  // Note: no repacking needed here.
362 363 364
}

void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
365
                            TX_TYPE tx_type, int bd) {
366
  int32_t txfm_buf[64 * 32];
367 368
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_64X32, &cfg);
369
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
370 371 372 373 374

  // Zero out right 32x32 area.
  for (int row = 0; row < 32; ++row) {
    memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
  }
375 376 377 378
  // Re-pack non-zero coeffs in the first 32x32 indices.
  for (int row = 1; row < 32; ++row) {
    memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
  }
379
}
380 381 382

void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride,
                            TX_TYPE tx_type, int bd) {
383
  DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 16]);
384 385 386 387 388
  TXFM_2D_FLIP_CFG cfg;
#if NO_FWD_TRANSPOSE
  av1_get_fwd_txfm_cfg(tx_type, TX_16X64, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#else
389 390 391 392 393 394 395 396 397 398 399 400
  int16_t rinput[64 * 16];
  TX_SIZE tx_size = TX_16X64;
  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
  int w = tx_size_wide[tx_size];
  int h = tx_size_high[tx_size];
  int rw = h;
  int rh = w;
  transpose_int16(rinput, rw, input, stride, w, h);
  av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
  transpose_int32(output, w, txfm_buf, rw, rw, rh);
401
#endif  // NO_FWD_TRANSPOSE
402 403
  // Zero out the bottom 16x32 area.
  memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
404
  // Note: no repacking needed here.
405 406 407 408 409 410 411 412
}

void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride,
                            TX_TYPE tx_type, int bd) {
  int32_t txfm_buf[64 * 16];
  TXFM_2D_FLIP_CFG cfg;
  av1_get_fwd_txfm_cfg(tx_type, TX_64X16, &cfg);
  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
413 414 415 416
  // Zero out right 32x16 area.
  for (int row = 0; row < 16; ++row) {
    memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
  }
417 418 419 420
  // Re-pack non-zero coeffs in the first 32x16 indices.
  for (int row = 1; row < 16; ++row) {
    memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
  }
421
}
422 423
#endif  // CONFIG_TX64X64

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
static const int8_t fwd_shift_4x4[3] = { 2, 0, 0 };
static const int8_t fwd_shift_8x8[3] = { 2, -1, 0 };
static const int8_t fwd_shift_16x16[3] = { 2, -2, 0 };
static const int8_t fwd_shift_32x32[3] = { 2, -4, 0 };
#if CONFIG_TX64X64
static const int8_t fwd_shift_64x64[3] = { 0, -2, -2 };
#endif
static const int8_t fwd_shift_4x8[3] = { 2, -1, 0 };
static const int8_t fwd_shift_8x4[3] = { 2, -1, 0 };
static const int8_t fwd_shift_8x16[3] = { 2, -2, 0 };
static const int8_t fwd_shift_16x8[3] = { 2, -2, 0 };
static const int8_t fwd_shift_16x32[3] = { 2, -4, 0 };
static const int8_t fwd_shift_32x16[3] = { 2, -4, 0 };
#if CONFIG_TX64X64
static const int8_t fwd_shift_32x64[3] = { 0, -2, -2 };
Angie Chiang's avatar
Angie Chiang committed
439
static const int8_t fwd_shift_64x32[3] = { 2, -4, -2 };
440 441 442 443 444 445 446
#endif
static const int8_t fwd_shift_4x16[3] = { 2, -1, 0 };
static const int8_t fwd_shift_16x4[3] = { 2, -1, 0 };
static const int8_t fwd_shift_8x32[3] = { 2, -2, 0 };
static const int8_t fwd_shift_32x8[3] = { 2, -2, 0 };
#if CONFIG_TX64X64
static const int8_t fwd_shift_16x64[3] = { 0, -2, 0 };
Angie Chiang's avatar
Angie Chiang committed
447
static const int8_t fwd_shift_64x16[3] = { 2, -4, 0 };
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
#endif  // CONFIG_TX64X64

const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL] = {
  fwd_shift_4x4,   fwd_shift_8x8,   fwd_shift_16x16, fwd_shift_32x32,
#if CONFIG_TX64X64
  fwd_shift_64x64,
#endif  // CONFIG_TX64X64
  fwd_shift_4x8,   fwd_shift_8x4,   fwd_shift_8x16,  fwd_shift_16x8,
  fwd_shift_16x32, fwd_shift_32x16,
#if CONFIG_TX64X64
  fwd_shift_32x64, fwd_shift_64x32,
#endif  // CONFIG_TX64X64
  fwd_shift_4x16,  fwd_shift_16x4,  fwd_shift_8x32,  fwd_shift_32x8,
#if CONFIG_TX64X64
  fwd_shift_16x64, fwd_shift_64x16,
#endif  // CONFIG_TX64X64
};

Angie Chiang's avatar
Angie Chiang committed
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
const int8_t fwd_cos_bit_col[MAX_TXWH_IDX /*txw_idx*/]
                            [MAX_TXWH_IDX /*txh_idx*/] = {
                              { 13, 13, 13, 0, 0 },
                              { 13, 13, 13, 12, 0 },
                              { 13, 13, 13, 12, 13 },
                              { 0, 13, 13, 12, 13 },
                              { 0, 0, 13, 12, 13 }
                            };

const int8_t fwd_cos_bit_row[MAX_TXWH_IDX /*txw_idx*/]
                            [MAX_TXWH_IDX /*txh_idx*/] = {
                              { 13, 13, 13, 0, 0 },
                              { 13, 13, 13, 12, 0 },
                              { 13, 13, 12, 13, 12 },
                              { 0, 12, 13, 12, 11 },
                              { 0, 0, 12, 11, 10 }
                            };
483

Angie Chiang's avatar
Angie Chiang committed
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
const int8_t fdct4_range_mult2[4] = { 0, 2, 3, 3 };
const int8_t fdct8_range_mult2[6] = { 0, 2, 4, 5, 5, 5 };
const int8_t fdct16_range_mult2[8] = { 0, 2, 4, 6, 7, 7, 7, 7 };
const int8_t fdct32_range_mult2[10] = { 0, 2, 4, 6, 8, 9, 9, 9, 9, 9 };
const int8_t fdct64_range_mult2[12] = { 0,  2,  4,  6,  8,  10,
                                        11, 11, 11, 11, 11, 11 };

const int8_t fadst4_range_mult2[6] = { 0, 0, 1, 3, 3, 3 };
const int8_t fadst8_range_mult2[8] = { 0, 0, 1, 3, 3, 5, 5, 5 };
const int8_t fadst16_range_mult2[10] = { 0, 0, 1, 3, 3, 5, 5, 7, 7, 7 };
const int8_t fadst32_range_mult2[12] = { 0, 0, 1, 3, 3, 5, 5, 7, 7, 9, 9, 9 };

const int8_t max_fwd_range_mult2_col[5] = { 3, 5, 7, 9, 11 };

const int8_t fidtx4_range_mult2[1] = { 1 };
const int8_t fidtx8_range_mult2[1] = { 2 };
const int8_t fidtx16_range_mult2[1] = { 3 };
const int8_t fidtx32_range_mult2[1] = { 4 };
const int8_t fidtx64_range_mult2[1] = { 5 };

Angie Chiang's avatar
Angie Chiang committed
504 505 506 507 508 509 510
const int8_t fwd_idtx_range_row[MAX_TXWH_IDX /*txw_idx*/]
                               [MAX_TXWH_IDX /*txh_idx*/] = { { 2, 4, 5, 0, 0 },
                                                              { 3, 4, 5, 6, 0 },
                                                              { 4, 5, 6, 7, 8 },
                                                              { 0, 5, 6, 7, 8 },
                                                              { 0, 0, 7, 8,
                                                                9 } };
Angie Chiang's avatar
Angie Chiang committed
511 512 513 514 515 516 517 518 519 520

const int8_t *fwd_txfm_range_mult2_list[TXFM_TYPES] = {
  fdct4_range_mult2,   fdct8_range_mult2,   fdct16_range_mult2,
  fdct32_range_mult2,  fdct64_range_mult2,  fadst4_range_mult2,
  fadst8_range_mult2,  fadst16_range_mult2, fadst32_range_mult2,
  fidtx4_range_mult2,  fidtx8_range_mult2,  fidtx16_range_mult2,
  fidtx32_range_mult2, fidtx64_range_mult2
};

static INLINE void set_fwd_txfm_non_scale_range(TXFM_2D_FLIP_CFG *cfg) {
Angie Chiang's avatar
Angie Chiang committed
521 522
  const int txw_idx = get_txw_idx(cfg->tx_size);
  const int txh_idx = get_txh_idx(cfg->tx_size);
Angie Chiang's avatar
Angie Chiang committed
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
  av1_zero(cfg->stage_range_col);
  av1_zero(cfg->stage_range_row);

  if (cfg->txfm_type_col != TXFM_TYPE_INVALID) {
    int stage_num_col = cfg->stage_num_col;
    const int8_t *range_mult2_col =
        fwd_txfm_range_mult2_list[cfg->txfm_type_col];
    for (int i = 0; i < stage_num_col; ++i)
      cfg->stage_range_col[i] = (range_mult2_col[i] + 1) >> 1;
  }

  if (cfg->txfm_type_row != TXFM_TYPE_INVALID) {
    int stage_num_row = cfg->stage_num_row;
    const int8_t *range_mult2_row =
        fwd_txfm_range_mult2_list[cfg->txfm_type_row];
    if (stage_num_row > 1) {
      // non identity
      for (int i = 0; i < stage_num_row; ++i)
        cfg->stage_range_row[i] =
            (max_fwd_range_mult2_col[txh_idx] + range_mult2_row[i] + 1) >> 1;
    } else {
      // identity
      // TODO(angiebird): check if this config is correct
      cfg->stage_range_row[0] = fwd_idtx_range_row[txw_idx][txh_idx];
    }
  }
}

551 552 553
void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
                          TXFM_2D_FLIP_CFG *cfg) {
  assert(cfg != NULL);
Angie Chiang's avatar
Angie Chiang committed
554
  cfg->tx_size = tx_size;
555
  set_flip_cfg(tx_type, cfg);
Angie Chiang's avatar
Angie Chiang committed
556 557 558 559
  const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
  const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
  const int txw_idx = tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
  const int txh_idx = tx_size_high_log2[tx_size] - tx_size_high_log2[0];
560
  cfg->shift = fwd_txfm_shift_ls[tx_size];
561 562
  cfg->cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
  cfg->cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
Angie Chiang's avatar
Angie Chiang committed
563 564 565 566 567
  cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
  cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
  cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
  cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
  set_fwd_txfm_non_scale_range(cfg);
568
}