av1_fwd_txfm2d_sse4.c 3.92 KB
Newer Older
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 11
 */

Yaowu Xu's avatar
Yaowu Xu committed
12
#include "./av1_rtcd.h"
13
#include "av1/common/enums.h"
Yaowu Xu's avatar
Yaowu Xu committed
14 15
#include "av1/common/av1_txfm.h"
#include "av1/common/x86/av1_txfm1d_sse4.h"
Angie Chiang's avatar
Angie Chiang committed
16

Yaowu Xu's avatar
Yaowu Xu committed
17
static INLINE void int16_array_with_stride_to_int32_array_without_stride(
Angie Chiang's avatar
Angie Chiang committed
18 19 20 21 22 23 24 25 26 27 28 29
    const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
  int r, c;
  for (r = 0; r < txfm1d_size; r++) {
    for (c = 0; c < txfm1d_size; c++) {
      output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
    }
  }
}

typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
                             const int8_t *cos_bit, const int8_t *stage_range);

Yaowu Xu's avatar
Yaowu Xu committed
30
static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
Angie Chiang's avatar
Angie Chiang committed
31
  switch (txfm_type) {
Yaowu Xu's avatar
Yaowu Xu committed
32 33
    case TXFM_TYPE_DCT32: return av1_fdct32_new_sse4_1; break;
    case TXFM_TYPE_ADST32: return av1_fadst32_new_sse4_1; break;
clang-format's avatar
clang-format committed
34
    default: assert(0);
Angie Chiang's avatar
Angie Chiang committed
35 36 37 38
  }
  return NULL;
}

Yaowu Xu's avatar
Yaowu Xu committed
39
static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
40 41
                                     const int stride,
                                     const TXFM_2D_FLIP_CFG *cfg,
42
                                     int32_t *txfm_buf) {
43 44 45 46 47
  // TODO(sarahparker) This does not currently support rectangular transforms
  // and will break without splitting txfm_size out into row and col size.
  // Rectangular transforms use c code only, so it should be ok for now.
  // It will be corrected when there are sse implementations for rectangular
  // transforms.
Sarah Parker's avatar
Sarah Parker committed
48
  assert(cfg->row_cfg->txfm_size == cfg->col_cfg->txfm_size);
49 50 51 52 53 54 55 56 57 58
  const int txfm_size = cfg->row_cfg->txfm_size;
  const int8_t *shift = cfg->row_cfg->shift;
  const int8_t *stage_range_col = cfg->col_cfg->stage_range;
  const int8_t *stage_range_row = cfg->row_cfg->stage_range;
  const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
  const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
  const TxfmFuncSSE2 txfm_func_col =
      fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
  const TxfmFuncSSE2 txfm_func_row =
      fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
Angie Chiang's avatar
Angie Chiang committed
59 60 61 62 63 64 65 66

  __m128i *buf_128 = (__m128i *)txfm_buf;
  __m128i *out_128 = (__m128i *)output;
  int num_per_128 = 4;
  int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;

  int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
                                                        txfm_size);
67
  round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]);
Angie Chiang's avatar
Angie Chiang committed
68
  txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
69
  round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
Angie Chiang's avatar
Angie Chiang committed
70 71
  transpose_32(txfm_size, out_128, buf_128);
  txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
72
  round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
Angie Chiang's avatar
Angie Chiang committed
73 74 75
  transpose_32(txfm_size, buf_128, out_128);
}

Yaowu Xu's avatar
Yaowu Xu committed
76 77
void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
                                 int stride, int tx_type, int bd) {
78
  DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
Yaowu Xu's avatar
Yaowu Xu committed
79
  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
Angie Chiang's avatar
Angie Chiang committed
80
  (void)bd;
81
  fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
Angie Chiang's avatar
Angie Chiang committed
82 83
}

84
#if CONFIG_TX64X64
Yaowu Xu's avatar
Yaowu Xu committed
85 86
void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
                                 int stride, int tx_type, int bd) {
87
  DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
Yaowu Xu's avatar
Yaowu Xu committed
88
  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
Angie Chiang's avatar
Angie Chiang committed
89
  (void)bd;
90
  fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
Angie Chiang's avatar
Angie Chiang committed
91
}
92
#endif  // CONFIG_TX64X64