Commit fb77385f authored by Yi Luo's avatar Yi Luo Committed by Gerrit Code Review
Browse files

Merge "Remove unused copies of transform related source code" into nextgenv2

parents 7f6bf9c7 ea1167c3
......@@ -30,8 +30,6 @@ AV1_COMMON_SRCS-yes += common/filter.h
AV1_COMMON_SRCS-yes += common/filter.c
AV1_COMMON_SRCS-yes += common/idct.h
AV1_COMMON_SRCS-yes += common/idct.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm.c
AV1_COMMON_SRCS-yes += common/loopfilter.h
AV1_COMMON_SRCS-yes += common/thread_common.h
AV1_COMMON_SRCS-yes += common/mv.h
......@@ -61,8 +59,6 @@ AV1_COMMON_SRCS-yes += common/common_data.h
AV1_COMMON_SRCS-yes += common/scan.c
AV1_COMMON_SRCS-yes += common/scan.h
# TODO(angiebird) the forward transform belongs under encoder/
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER) += common/av1_fwd_txfm.h
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER) += common/av1_fwd_txfm.c
AV1_COMMON_SRCS-yes += common/av1_txfm.h
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d.h
AV1_COMMON_SRCS-yes += common/av1_fwd_txfm1d.c
......@@ -125,9 +121,6 @@ AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/hybrid_inv_txfm_avx2.c
ifeq ($(CONFIG_AV1_ENCODER),yes)
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/av1_fwd_txfm_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/av1_fwd_dct32x32_impl_sse2.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/av1_fwd_txfm_impl_sse2.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_txfm1d_sse4.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_fwd_txfm1d_sse4.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_fwd_txfm2d_sse4.c
......@@ -145,7 +138,4 @@ ifeq ($(CONFIG_FILTER_INTRA),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/filterintra_sse4.c
endif
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/av1_inv_txfm_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/av1_inv_txfm_sse2.h
$(eval $(call rtcd_h_template,av1_rtcd,av1/common/av1_rtcd_defs.pl))
This diff is collapsed.
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_COMMON_AV1_FWD_TXFM_H_
#define AV1_COMMON_AV1_FWD_TXFM_H_
#include "aom_dsp/txfm_common.h"
#include "aom_dsp/fwd_txfm.h"
void av1_fdct32(const tran_high_t *input, tran_high_t *output, int round);
#endif // AV1_COMMON_AV1_FWD_TXFM_H_
This diff is collapsed.
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AOM_DSP_INV_TXFM_H_
#define AOM_DSP_INV_TXFM_H_
#include <assert.h>
#include "./aom_config.h"
#include "aom_dsp/txfm_common.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
static INLINE tran_high_t check_range(tran_high_t input) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid input streams, intermediate stage coefficients should always
// stay within the range of a signed 16 bit integer. Coefficients can go out
// of this range for invalid/corrupt streams. However, strictly checking
// this range for every intermediate coefficient can burdensome for a decoder,
// therefore the following assertion is only enabled when configured with
// --enable-coefficient-range-checking.
assert(INT16_MIN <= input);
assert(input <= INT16_MAX);
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
return input;
}
static INLINE tran_high_t dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
return rv;
}
#if CONFIG_AOM_HIGHBITDEPTH
static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid highbitdepth streams, intermediate stage coefficients will
// stay within the ranges:
// - 8 bit: signed 16 bit integer
// - 10 bit: signed 18 bit integer
// - 12 bit: signed 20 bit integer
const int32_t int_max = (1 << (7 + bd)) - 1;
const int32_t int_min = -int_max - 1;
assert(int_min <= input);
assert(input <= int_max);
(void)int_min;
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
(void)bd;
return input;
}
static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
return rv;
}
#endif // CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_EMULATE_HARDWARE
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
// non-normative method to handle overflows. A stream that causes
// overflows in the inverse transform is considered invalid,
// and a hardware implementer is free to choose any reasonable
// method to handle overflows. However to aid in hardware
// verification they can use a specific implementation of the
// WRAPLOW() macro below that is identical to their intended
// hardware implementation (and also use configure options to trigger
// the C-implementation of the transform).
//
// The particular WRAPLOW implementation below performs strict
// overflow wrapping to match common hardware implementations.
// bd of 8 uses trans_low with 16bits, need to remove 16bits
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
#if CONFIG_AOM_HIGHBITDEPTH
#define HIGHBD_WRAPLOW(x, bd) \
((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd))
#endif // CONFIG_AOM_HIGHBITDEPTH
#else // CONFIG_EMULATE_HARDWARE
#define WRAPLOW(x) ((int32_t)check_range(x))
#if CONFIG_AOM_HIGHBITDEPTH
#define HIGHBD_WRAPLOW(x, bd) ((int32_t)highbd_check_range((x), bd))
#endif // CONFIG_AOM_HIGHBITDEPTH
#endif // CONFIG_EMULATE_HARDWARE
void av1_idct4_c(const tran_low_t *input, tran_low_t *output);
void av1_idct8_c(const tran_low_t *input, tran_low_t *output);
void av1_idct16_c(const tran_low_t *input, tran_low_t *output);
void av1_idct32_c(const tran_low_t *input, tran_low_t *output);
void av1_iadst4_c(const tran_low_t *input, tran_low_t *output);
void av1_iadst8_c(const tran_low_t *input, tran_low_t *output);
void av1_iadst16_c(const tran_low_t *input, tran_low_t *output);
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
void av1_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
void av1_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
void av1_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
void av1_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
void av1_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
trans = HIGHBD_WRAPLOW(trans, bd);
return clip_pixel_highbd(dest + (int)trans, bd);
}
#endif
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
trans = WRAPLOW(trans);
return clip_pixel(dest + (int)trans);
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif // AOM_DSP_INV_TXFM_H_
......@@ -414,62 +414,6 @@ if (aom_config("CONFIG_EXT_TX") eq "yes") {
specialize qw/av1_fht32x16 sse2/;
}
if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct4x4/;
add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct4x4_1/;
add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct8x8/;
add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct8x8_1/;
add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct16x16/;
add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct16x16_1/;
add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct32x32/;
add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct32x32_rd/;
add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct32x32_1/;
} else {
add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct4x4 sse2/;
add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct4x4_1 sse2/;
add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct8x8 sse2/;
add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct8x8_1 sse2/;
add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct16x16 sse2/;
add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct16x16_1 sse2/;
add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct32x32 sse2/;
add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct32x32_rd sse2/;
add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_fdct32x32_1 sse2/;
}
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") ne "yes") {
if (aom_config("CONFIG_EXT_TX") ne "yes") {
specialize qw/av1_fht4x4 msa/;
......@@ -478,243 +422,9 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") ne "yes") {
}
}
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct4x4/;
add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct8x8/;
add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct8x8_1/;
add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct16x16/;
add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct16x16_1/;
add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct32x32/;
add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct32x32_rd/;
add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct32x32_1/;
} else {
add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct4x4 sse2/;
add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct8x8 sse2/;
add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct8x8_1/;
add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct16x16 sse2/;
add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct16x16_1/;
add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct32x32 sse2/;
add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct32x32_rd sse2/;
add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fdct32x32_1/;
}
}
add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
specialize qw/av1_fwd_idtx/;
# Inverse transform
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto qw/void av1_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct4x4_1_add/;
add_proto qw/void av1_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct4x4_16_add/;
add_proto qw/void av1_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_1_add/;
add_proto qw/void av1_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_64_add/;
add_proto qw/void av1_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_12_add/;
add_proto qw/void av1_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_1_add/;
add_proto qw/void av1_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_256_add/;
add_proto qw/void av1_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_10_add/;
add_proto qw/void av1_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_1024_add/;
add_proto qw/void av1_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_34_add/;
add_proto qw/void av1_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_1_add/;
add_proto qw/void av1_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_iwht4x4_1_add/;
add_proto qw/void av1_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_iwht4x4_16_add/;
add_proto qw/void av1_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct4x4_1_add/;
add_proto qw/void av1_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct8x8_1_add/;
add_proto qw/void av1_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct16x16_1_add/;
add_proto qw/void av1_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct32x32_1024_add/;
add_proto qw/void av1_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct32x32_34_add/;
add_proto qw/void av1_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct32x32_1_add/;
add_proto qw/void av1_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_iwht4x4_1_add/;
add_proto qw/void av1_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_iwht4x4_16_add/;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void av1_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct4x4_16_add/;
add_proto qw/void av1_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct8x8_64_add/;
add_proto qw/void av1_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct8x8_10_add/;
add_proto qw/void av1_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct16x16_256_add/;
add_proto qw/void av1_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct16x16_10_add/;
} else {
add_proto qw/void av1_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct4x4_16_add sse2/;
add_proto qw/void av1_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct8x8_64_add sse2/;
add_proto qw/void av1_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct8x8_10_add sse2/;
add_proto qw/void av1_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct16x16_256_add sse2/;
add_proto qw/void av1_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/av1_highbd_idct16x16_10_add sse2/;
} # CONFIG_EMULATE_HARDWARE
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void av1_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct4x4_1_add/;
add_proto qw/void av1_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct4x4_16_add/;
add_proto qw/void av1_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_1_add/;
add_proto qw/void av1_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_64_add/;
add_proto qw/void av1_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_12_add/;
add_proto qw/void av1_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_1_add/;
add_proto qw/void av1_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_256_add/;
add_proto qw/void av1_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_10_add/;
add_proto qw/void av1_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_1024_add/;
add_proto qw/void av1_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_34_add/;
add_proto qw/void av1_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_1_add/;
add_proto qw/void av1_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_iwht4x4_1_add/;
add_proto qw/void av1_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_iwht4x4_16_add/;
} else {
add_proto qw/void av1_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct4x4_1_add sse2/;
add_proto qw/void av1_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct4x4_16_add sse2/;
add_proto qw/void av1_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_1_add sse2/;
add_proto qw/void av1_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_64_add sse2/;
add_proto qw/void av1_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct8x8_12_add sse2/;
add_proto qw/void av1_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_1_add sse2/;
add_proto qw/void av1_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_256_add sse2/;
add_proto qw/void av1_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct16x16_10_add sse2/;
add_proto qw/void av1_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_1024_add sse2/;
add_proto qw/void av1_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_34_add sse2/;
add_proto qw/void av1_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_idct32x32_1_add sse2/;
add_proto qw/void av1_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_iwht4x4_1_add/;
add_proto qw/void av1_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/av1_iwht4x4_16_add/;
} # CONFIG_EMULATE_HARDWARE
} # CONFIG_AOM_HIGHBITDEPTH
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
#fwd txfm
add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
......
This diff is collapsed.
This diff is collapsed.
......@@ -28,13 +28,7 @@ typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
case TXFM_TYPE_DCT4: return av1_fdct4_new_sse4_1; break;
case TXFM_TYPE_DCT8: return av1_fdct8_new_sse4_1; break;
case TXFM_TYPE_DCT16: return av1_fdct16_new_sse4_1; break;
case TXFM_TYPE_DCT32: return av1_fdct32_new_sse4_1; break;
case TXFM_TYPE_ADST4: return av1_fadst4_new_sse4_1; break;
case TXFM_TYPE_ADST8: return av1_fadst8_new_sse4_1; break;
case TXFM_TYPE_ADST16: return av1_fadst16_new_sse4_1; break;
case TXFM_TYPE_ADST32: return av1_fadst32_new_sse4_1; break;
default: assert(0);
}
......
This diff is collapsed.
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <emmintrin.h> // SSE2
#include "./aom_config.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/x86/fwd_txfm_sse2.h"
void av1_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
__m128i in0, in1;
__m128i tmp;
const __m128i zero = _mm_setzero_si128();
in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
in1 = _mm_unpacklo_epi64(
in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride)));
in0 = _mm_unpacklo_epi64(
in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride)));
tmp = _mm_add_epi16(in0, in1);
in0 = _mm_unpacklo_epi16(zero, tmp);
in1 = _mm_unpackhi_epi16(zero, tmp);
in0 = _mm_srai_epi32(in0, 16);
in1 = _mm_srai_epi32(in1, 16);
tmp = _mm_add_epi32(in0, in1);
in0 = _mm_unpacklo_epi32(tmp, zero);
in1 = _mm_unpackhi_epi32(tmp, zero);
tmp = _mm_add_epi32(in0, in1);
in0 = _mm_srli_si128(tmp, 8);
in1 = _mm_add_epi32(tmp, in0);
in0 = _mm_slli_epi32(in1, 1);
store_output(&in0, output);
}
void av1_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i u0, u1, sum;
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
sum = _mm_add_epi16(u0, u1);
in0 = _mm_add_epi16(in0, in1);
in2 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, in0);
u0 = _mm_setzero_si128();
sum = _mm_add_epi16(sum, in2);
in0 = _mm_unpacklo_epi16(u0, sum);
in1 = _mm_unpackhi_epi16(u0, sum);
in0 = _mm_srai_epi32(in0, 16);
in1 = _mm_srai_epi32(in1, 16);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_unpacklo_epi32(sum, u0);
in1 = _mm_unpackhi_epi32(sum, u0);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_srli_si128(sum, 8);
in1 = _mm_add_epi32(sum, in0);
store_output(&in1, output);
}
void av1_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
int stride) {
__m128i in0, in1, in2, in3;
__m128i u0, u1;