Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
fb77385f
Commit
fb77385f
authored
Nov 02, 2016
by
Yi Luo
Committed by
Gerrit Code Review
Nov 02, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Remove unused copies of transform related source code" into nextgenv2
parents
7f6bf9c7
ea1167c3
Changes
14
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
19 additions
and
13165 deletions
+19
-13165
av1/av1_common.mk
av1/av1_common.mk
+0
-10
av1/common/av1_fwd_txfm.c
av1/common/av1_fwd_txfm.c
+0
-813
av1/common/av1_fwd_txfm.h
av1/common/av1_fwd_txfm.h
+0
-19
av1/common/av1_inv_txfm.c
av1/common/av1_inv_txfm.c
+0
-2468
av1/common/av1_inv_txfm.h
av1/common/av1_inv_txfm.h
+0
-133
av1/common/av1_rtcd_defs.pl
av1/common/av1_rtcd_defs.pl
+0
-290
av1/common/x86/av1_fwd_dct32x32_impl_sse2.h
av1/common/x86/av1_fwd_dct32x32_impl_sse2.h
+0
-3202
av1/common/x86/av1_fwd_txfm1d_sse4.c
av1/common/x86/av1_fwd_txfm1d_sse4.c
+0
-713
av1/common/x86/av1_fwd_txfm2d_sse4.c
av1/common/x86/av1_fwd_txfm2d_sse4.c
+0
-6
av1/common/x86/av1_fwd_txfm_impl_sse2.h
av1/common/x86/av1_fwd_txfm_impl_sse2.h
+0
-1014
av1/common/x86/av1_fwd_txfm_sse2.c
av1/common/x86/av1_fwd_txfm_sse2.c
+0
-272
av1/common/x86/av1_inv_txfm_sse2.c
av1/common/x86/av1_inv_txfm_sse2.c
+0
-4028
av1/common/x86/av1_inv_txfm_sse2.h
av1/common/x86/av1_inv_txfm_sse2.h
+0
-178
test/av1_inv_txfm_test.cc
test/av1_inv_txfm_test.cc
+19
-19
No files found.
av1/av1_common.mk
View file @
fb77385f
...
...
@@ -30,8 +30,6 @@ AV1_COMMON_SRCS-yes += common/filter.h
AV1_COMMON_SRCS-yes
+=
common/filter.c
AV1_COMMON_SRCS-yes
+=
common/idct.h
AV1_COMMON_SRCS-yes
+=
common/idct.c
AV1_COMMON_SRCS-yes
+=
common/av1_inv_txfm.h
AV1_COMMON_SRCS-yes
+=
common/av1_inv_txfm.c
AV1_COMMON_SRCS-yes
+=
common/loopfilter.h
AV1_COMMON_SRCS-yes
+=
common/thread_common.h
AV1_COMMON_SRCS-yes
+=
common/mv.h
...
...
@@ -61,8 +59,6 @@ AV1_COMMON_SRCS-yes += common/common_data.h
AV1_COMMON_SRCS-yes
+=
common/scan.c
AV1_COMMON_SRCS-yes
+=
common/scan.h
# TODO(angiebird) the forward transform belongs under encoder/
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER)
+=
common/av1_fwd_txfm.h
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER)
+=
common/av1_fwd_txfm.c
AV1_COMMON_SRCS-yes
+=
common/av1_txfm.h
AV1_COMMON_SRCS-yes
+=
common/av1_fwd_txfm1d.h
AV1_COMMON_SRCS-yes
+=
common/av1_fwd_txfm1d.c
...
...
@@ -125,9 +121,6 @@ AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
AV1_COMMON_SRCS-$(HAVE_AVX2)
+=
common/x86/hybrid_inv_txfm_avx2.c
ifeq
($(CONFIG_AV1_ENCODER),yes)
AV1_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/av1_fwd_txfm_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/av1_fwd_dct32x32_impl_sse2.h
AV1_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/av1_fwd_txfm_impl_sse2.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1)
+=
common/x86/av1_txfm1d_sse4.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1)
+=
common/x86/av1_fwd_txfm1d_sse4.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1)
+=
common/x86/av1_fwd_txfm2d_sse4.c
...
...
@@ -145,7 +138,4 @@ ifeq ($(CONFIG_FILTER_INTRA),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1)
+=
common/x86/filterintra_sse4.c
endif
AV1_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/av1_inv_txfm_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/av1_inv_txfm_sse2.h
$(eval
$(call
rtcd_h_template,av1_rtcd,av1/common/av1_rtcd_defs.pl))
av1/common/av1_fwd_txfm.c
deleted
100644 → 0
View file @
7f6bf9c7
This diff is collapsed.
Click to expand it.
av1/common/av1_fwd_txfm.h
deleted
100644 → 0
View file @
7f6bf9c7
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_COMMON_AV1_FWD_TXFM_H_
#define AV1_COMMON_AV1_FWD_TXFM_H_
#include "aom_dsp/txfm_common.h"
#include "aom_dsp/fwd_txfm.h"
void
av1_fdct32
(
const
tran_high_t
*
input
,
tran_high_t
*
output
,
int
round
);
#endif // AV1_COMMON_AV1_FWD_TXFM_H_
av1/common/av1_inv_txfm.c
deleted
100644 → 0
View file @
7f6bf9c7
This diff is collapsed.
Click to expand it.
av1/common/av1_inv_txfm.h
deleted
100644 → 0
View file @
7f6bf9c7
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AOM_DSP_INV_TXFM_H_
#define AOM_DSP_INV_TXFM_H_
#include <assert.h>
#include "./aom_config.h"
#include "aom_dsp/txfm_common.h"
#include "aom_dsp/inv_txfm.h"
#include "aom_ports/mem.h"
#ifdef __cplusplus
extern
"C"
{
#endif
static
INLINE
tran_high_t
check_range
(
tran_high_t
input
)
{
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid input streams, intermediate stage coefficients should always
// stay within the range of a signed 16 bit integer. Coefficients can go out
// of this range for invalid/corrupt streams. However, strictly checking
// this range for every intermediate coefficient can burdensome for a decoder,
// therefore the following assertion is only enabled when configured with
// --enable-coefficient-range-checking.
assert
(
INT16_MIN
<=
input
);
assert
(
input
<=
INT16_MAX
);
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
return
input
;
}
static
INLINE
tran_high_t
dct_const_round_shift
(
tran_high_t
input
)
{
tran_high_t
rv
=
ROUND_POWER_OF_TWO
(
input
,
DCT_CONST_BITS
);
return
rv
;
}
#if CONFIG_AOM_HIGHBITDEPTH
static
INLINE
tran_high_t
highbd_check_range
(
tran_high_t
input
,
int
bd
)
{
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid highbitdepth streams, intermediate stage coefficients will
// stay within the ranges:
// - 8 bit: signed 16 bit integer
// - 10 bit: signed 18 bit integer
// - 12 bit: signed 20 bit integer
const
int32_t
int_max
=
(
1
<<
(
7
+
bd
))
-
1
;
const
int32_t
int_min
=
-
int_max
-
1
;
assert
(
int_min
<=
input
);
assert
(
input
<=
int_max
);
(
void
)
int_min
;
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
(
void
)
bd
;
return
input
;
}
static
INLINE
tran_high_t
highbd_dct_const_round_shift
(
tran_high_t
input
)
{
tran_high_t
rv
=
ROUND_POWER_OF_TWO
(
input
,
DCT_CONST_BITS
);
return
rv
;
}
#endif // CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_EMULATE_HARDWARE
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
// non-normative method to handle overflows. A stream that causes
// overflows in the inverse transform is considered invalid,
// and a hardware implementer is free to choose any reasonable
// method to handle overflows. However to aid in hardware
// verification they can use a specific implementation of the
// WRAPLOW() macro below that is identical to their intended
// hardware implementation (and also use configure options to trigger
// the C-implementation of the transform).
//
// The particular WRAPLOW implementation below performs strict
// overflow wrapping to match common hardware implementations.
// bd of 8 uses trans_low with 16bits, need to remove 16bits
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
#if CONFIG_AOM_HIGHBITDEPTH
#define HIGHBD_WRAPLOW(x, bd) \
((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd))
#endif // CONFIG_AOM_HIGHBITDEPTH
#else // CONFIG_EMULATE_HARDWARE
#define WRAPLOW(x) ((int32_t)check_range(x))
#if CONFIG_AOM_HIGHBITDEPTH
#define HIGHBD_WRAPLOW(x, bd) ((int32_t)highbd_check_range((x), bd))
#endif // CONFIG_AOM_HIGHBITDEPTH
#endif // CONFIG_EMULATE_HARDWARE
void
av1_idct4_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
);
void
av1_idct8_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
);
void
av1_idct16_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
);
void
av1_idct32_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
);
void
av1_iadst4_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
);
void
av1_iadst8_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
);
void
av1_iadst16_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
);
#if CONFIG_AOM_HIGHBITDEPTH
void
av1_highbd_idct4_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
,
int
bd
);
void
av1_highbd_idct8_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
,
int
bd
);
void
av1_highbd_idct16_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
,
int
bd
);
void
av1_highbd_iadst4_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
,
int
bd
);
void
av1_highbd_iadst8_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
,
int
bd
);
void
av1_highbd_iadst16_c
(
const
tran_low_t
*
input
,
tran_low_t
*
output
,
int
bd
);
static
INLINE
uint16_t
highbd_clip_pixel_add
(
uint16_t
dest
,
tran_high_t
trans
,
int
bd
)
{
trans
=
HIGHBD_WRAPLOW
(
trans
,
bd
);
return
clip_pixel_highbd
(
dest
+
(
int
)
trans
,
bd
);
}
#endif
static
INLINE
uint8_t
clip_pixel_add
(
uint8_t
dest
,
tran_high_t
trans
)
{
trans
=
WRAPLOW
(
trans
);
return
clip_pixel
(
dest
+
(
int
)
trans
);
}
#ifdef __cplusplus
}
// extern "C"
#endif
#endif // AOM_DSP_INV_TXFM_H_
av1/common/av1_rtcd_defs.pl
View file @
fb77385f
...
...
@@ -414,62 +414,6 @@ if (aom_config("CONFIG_EXT_TX") eq "yes") {
specialize
qw/av1_fht32x16 sse2/
;
}
if
(
aom_config
("
CONFIG_EMULATE_HARDWARE
")
eq
"
yes
")
{
add_proto
qw/void av1_fdct4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct4x4/
;
add_proto
qw/void av1_fdct4x4_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct4x4_1/
;
add_proto
qw/void av1_fdct8x8/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct8x8/
;
add_proto
qw/void av1_fdct8x8_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct8x8_1/
;
add_proto
qw/void av1_fdct16x16/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct16x16/
;
add_proto
qw/void av1_fdct16x16_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct16x16_1/
;
add_proto
qw/void av1_fdct32x32/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct32x32/
;
add_proto
qw/void av1_fdct32x32_rd/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct32x32_rd/
;
add_proto
qw/void av1_fdct32x32_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct32x32_1/
;
}
else
{
add_proto
qw/void av1_fdct4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct4x4 sse2/
;
add_proto
qw/void av1_fdct4x4_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct4x4_1 sse2/
;
add_proto
qw/void av1_fdct8x8/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct8x8 sse2/
;
add_proto
qw/void av1_fdct8x8_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct8x8_1 sse2/
;
add_proto
qw/void av1_fdct16x16/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct16x16 sse2/
;
add_proto
qw/void av1_fdct16x16_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct16x16_1 sse2/
;
add_proto
qw/void av1_fdct32x32/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct32x32 sse2/
;
add_proto
qw/void av1_fdct32x32_rd/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct32x32_rd sse2/
;
add_proto
qw/void av1_fdct32x32_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_fdct32x32_1 sse2/
;
}
if
(
aom_config
("
CONFIG_AOM_HIGHBITDEPTH
")
ne
"
yes
")
{
if
(
aom_config
("
CONFIG_EXT_TX
")
ne
"
yes
")
{
specialize
qw/av1_fht4x4 msa/
;
...
...
@@ -478,243 +422,9 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") ne "yes") {
}
}
if
(
aom_config
("
CONFIG_AOM_HIGHBITDEPTH
")
eq
"
yes
")
{
if
(
aom_config
("
CONFIG_EMULATE_HARDWARE
")
eq
"
yes
")
{
add_proto
qw/void av1_highbd_fdct4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct4x4/
;
add_proto
qw/void av1_highbd_fdct8x8/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct8x8/
;
add_proto
qw/void av1_highbd_fdct8x8_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct8x8_1/
;
add_proto
qw/void av1_highbd_fdct16x16/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct16x16/
;
add_proto
qw/void av1_highbd_fdct16x16_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct16x16_1/
;
add_proto
qw/void av1_highbd_fdct32x32/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct32x32/
;
add_proto
qw/void av1_highbd_fdct32x32_rd/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct32x32_rd/
;
add_proto
qw/void av1_highbd_fdct32x32_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct32x32_1/
;
}
else
{
add_proto
qw/void av1_highbd_fdct4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct4x4 sse2/
;
add_proto
qw/void av1_highbd_fdct8x8/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct8x8 sse2/
;
add_proto
qw/void av1_highbd_fdct8x8_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct8x8_1/
;
add_proto
qw/void av1_highbd_fdct16x16/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct16x16 sse2/
;
add_proto
qw/void av1_highbd_fdct16x16_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct16x16_1/
;
add_proto
qw/void av1_highbd_fdct32x32/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct32x32 sse2/
;
add_proto
qw/void av1_highbd_fdct32x32_rd/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct32x32_rd sse2/
;
add_proto
qw/void av1_highbd_fdct32x32_1/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/av1_highbd_fdct32x32_1/
;
}
}
add_proto
qw/void av1_fwd_idtx/
,
"
const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type
";
specialize
qw/av1_fwd_idtx/
;
# Inverse transform
if
(
aom_config
("
CONFIG_AOM_HIGHBITDEPTH
")
eq
"
yes
")
{
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto
qw/void av1_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct4x4_1_add/
;
add_proto
qw/void av1_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct4x4_16_add/
;
add_proto
qw/void av1_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_1_add/
;
add_proto
qw/void av1_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_64_add/
;
add_proto
qw/void av1_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_12_add/
;
add_proto
qw/void av1_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_1_add/
;
add_proto
qw/void av1_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_256_add/
;
add_proto
qw/void av1_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_10_add/
;
add_proto
qw/void av1_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_1024_add/
;
add_proto
qw/void av1_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_34_add/
;
add_proto
qw/void av1_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_1_add/
;
add_proto
qw/void av1_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_iwht4x4_1_add/
;
add_proto
qw/void av1_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_iwht4x4_16_add/
;
add_proto
qw/void av1_highbd_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct4x4_1_add/
;
add_proto
qw/void av1_highbd_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct8x8_1_add/
;
add_proto
qw/void av1_highbd_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct16x16_1_add/
;
add_proto
qw/void av1_highbd_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct32x32_1024_add/
;
add_proto
qw/void av1_highbd_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct32x32_34_add/
;
add_proto
qw/void av1_highbd_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct32x32_1_add/
;
add_proto
qw/void av1_highbd_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_iwht4x4_1_add/
;
add_proto
qw/void av1_highbd_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_iwht4x4_16_add/
;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if
(
aom_config
("
CONFIG_EMULATE_HARDWARE
")
eq
"
yes
")
{
add_proto
qw/void av1_highbd_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct4x4_16_add/
;
add_proto
qw/void av1_highbd_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct8x8_64_add/
;
add_proto
qw/void av1_highbd_idct8x8_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct8x8_10_add/
;
add_proto
qw/void av1_highbd_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct16x16_256_add/
;
add_proto
qw/void av1_highbd_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct16x16_10_add/
;
}
else
{
add_proto
qw/void av1_highbd_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct4x4_16_add sse2/
;
add_proto
qw/void av1_highbd_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct8x8_64_add sse2/
;
add_proto
qw/void av1_highbd_idct8x8_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct8x8_10_add sse2/
;
add_proto
qw/void av1_highbd_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct16x16_256_add sse2/
;
add_proto
qw/void av1_highbd_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/av1_highbd_idct16x16_10_add sse2/
;
}
# CONFIG_EMULATE_HARDWARE
}
else
{
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if
(
aom_config
("
CONFIG_EMULATE_HARDWARE
")
eq
"
yes
")
{
add_proto
qw/void av1_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct4x4_1_add/
;
add_proto
qw/void av1_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct4x4_16_add/
;
add_proto
qw/void av1_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_1_add/
;
add_proto
qw/void av1_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_64_add/
;
add_proto
qw/void av1_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_12_add/
;
add_proto
qw/void av1_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_1_add/
;
add_proto
qw/void av1_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_256_add/
;
add_proto
qw/void av1_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_10_add/
;
add_proto
qw/void av1_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_1024_add/
;
add_proto
qw/void av1_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_34_add/
;
add_proto
qw/void av1_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_1_add/
;
add_proto
qw/void av1_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_iwht4x4_1_add/
;
add_proto
qw/void av1_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_iwht4x4_16_add/
;
}
else
{
add_proto
qw/void av1_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct4x4_1_add sse2/
;
add_proto
qw/void av1_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct4x4_16_add sse2/
;
add_proto
qw/void av1_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_1_add sse2/
;
add_proto
qw/void av1_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_64_add sse2/
;
add_proto
qw/void av1_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct8x8_12_add sse2/
;
add_proto
qw/void av1_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_1_add sse2/
;
add_proto
qw/void av1_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_256_add sse2/
;
add_proto
qw/void av1_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct16x16_10_add sse2/
;
add_proto
qw/void av1_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_1024_add sse2/
;
add_proto
qw/void av1_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_34_add sse2/
;
add_proto
qw/void av1_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_idct32x32_1_add sse2/
;
add_proto
qw/void av1_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_iwht4x4_1_add/
;
add_proto
qw/void av1_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/av1_iwht4x4_16_add/
;
}
# CONFIG_EMULATE_HARDWARE
}
# CONFIG_AOM_HIGHBITDEPTH
if
(
aom_config
("
CONFIG_AOM_HIGHBITDEPTH
")
eq
"
yes
")
{
#fwd txfm
add_proto
qw/void av1_fwd_txfm2d_4x4/
,
"
const int16_t *input, int32_t *output, int stride, int tx_type, int bd
";
...
...
av1/common/x86/av1_fwd_dct32x32_impl_sse2.h
deleted
100644 → 0
View file @
7f6bf9c7
This diff is collapsed.
Click to expand it.
av1/common/x86/av1_fwd_txfm1d_sse4.c
View file @
fb77385f
This diff is collapsed.
Click to expand it.
av1/common/x86/av1_fwd_txfm2d_sse4.c
View file @
fb77385f
...
...
@@ -28,13 +28,7 @@ typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
static
INLINE
TxfmFuncSSE2
fwd_txfm_type_to_func
(
TXFM_TYPE
txfm_type
)
{
switch
(
txfm_type
)
{
case
TXFM_TYPE_DCT4
:
return
av1_fdct4_new_sse4_1
;
break
;
case
TXFM_TYPE_DCT8
:
return
av1_fdct8_new_sse4_1
;
break
;
case
TXFM_TYPE_DCT16
:
return
av1_fdct16_new_sse4_1
;
break
;
case
TXFM_TYPE_DCT32
:
return
av1_fdct32_new_sse4_1
;
break
;
case
TXFM_TYPE_ADST4
:
return
av1_fadst4_new_sse4_1
;
break
;
case
TXFM_TYPE_ADST8
:
return
av1_fadst8_new_sse4_1
;
break
;
case
TXFM_TYPE_ADST16
:
return
av1_fadst16_new_sse4_1
;
break
;
case
TXFM_TYPE_ADST32
:
return
av1_fadst32_new_sse4_1
;
break
;
default:
assert
(
0
);
}
...
...
av1/common/x86/av1_fwd_txfm_impl_sse2.h
deleted
100644 → 0
View file @
7f6bf9c7
This diff is collapsed.
Click to expand it.
av1/common/x86/av1_fwd_txfm_sse2.c
deleted
100644 → 0
View file @
7f6bf9c7
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <emmintrin.h> // SSE2
#include "./aom_config.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/x86/fwd_txfm_sse2.h"
void
av1_fdct4x4_1_sse2
(
const
int16_t
*
input
,
tran_low_t
*
output
,
int
stride
)
{
__m128i
in0
,
in1
;
__m128i
tmp
;
const
__m128i
zero
=
_mm_setzero_si128
();
in0
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
0
*
stride
));
in1
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
1
*
stride
));
in1
=
_mm_unpacklo_epi64
(
in1
,
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
2
*
stride
)));
in0
=
_mm_unpacklo_epi64
(
in0
,
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
3
*
stride
)));
tmp
=
_mm_add_epi16
(
in0
,
in1
);
in0
=
_mm_unpacklo_epi16
(
zero
,
tmp
);
in1
=
_mm_unpackhi_epi16
(
zero
,
tmp
);
in0
=
_mm_srai_epi32
(
in0
,
16
);
in1
=
_mm_srai_epi32
(
in1
,
16
);
tmp
=
_mm_add_epi32
(
in0
,
in1
);
in0
=
_mm_unpacklo_epi32
(
tmp
,
zero
);
in1
=
_mm_unpackhi_epi32
(
tmp
,
zero
);
tmp
=
_mm_add_epi32
(
in0
,
in1
);
in0
=
_mm_srli_si128
(
tmp
,
8
);
in1
=
_mm_add_epi32
(
tmp
,
in0
);
in0
=
_mm_slli_epi32
(
in1
,
1
);
store_output
(
&
in0
,
output
);
}
void
av1_fdct8x8_1_sse2
(
const
int16_t
*
input
,
tran_low_t
*
output
,
int
stride
)
{
__m128i
in0
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
0
*
stride
));
__m128i
in1
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
1
*
stride
));
__m128i
in2
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
2
*
stride
));
__m128i
in3
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
3
*
stride
));
__m128i
u0
,
u1
,
sum
;
u0
=
_mm_add_epi16
(
in0
,
in1
);
u1
=
_mm_add_epi16
(
in2
,
in3
);
in0
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
4
*
stride
));
in1
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
5
*
stride
));
in2
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
6
*
stride
));
in3
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
7
*
stride
));
sum
=
_mm_add_epi16
(
u0
,
u1
);
in0
=
_mm_add_epi16
(
in0
,
in1
);
in2
=
_mm_add_epi16
(
in2
,
in3
);
sum
=
_mm_add_epi16
(
sum
,
in0
);
u0
=
_mm_setzero_si128
();
sum
=
_mm_add_epi16
(
sum
,
in2
);
in0
=
_mm_unpacklo_epi16
(
u0
,
sum
);
in1
=
_mm_unpackhi_epi16
(
u0
,
sum
);
in0
=
_mm_srai_epi32
(
in0
,
16
);
in1
=
_mm_srai_epi32
(
in1
,
16
);
sum
=
_mm_add_epi32
(
in0
,
in1
);
in0
=
_mm_unpacklo_epi32
(
sum
,
u0
);
in1
=
_mm_unpackhi_epi32
(
sum
,
u0
);
sum
=
_mm_add_epi32
(
in0
,
in1
);
in0
=
_mm_srli_si128
(
sum
,
8
);
in1
=
_mm_add_epi32
(
sum
,
in0
);
store_output
(
&
in1
,
output
);
}
void
av1_fdct16x16_1_sse2
(
const
int16_t
*
input
,
tran_low_t
*
output
,
int
stride
)
{
__m128i
in0
,
in1
,
in2
,
in3
;
__m128i
u0
,
u1
;
__m128i
sum
=
_mm_setzero_si128
();
<