Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
8a01074d
Commit
8a01074d
authored
Oct 03, 2014
by
Deb Mukherjee
Committed by
Gerrit Code Review
Oct 03, 2014
Browse files
Merge "Incorporate WRAPLOW macro into non-highbitdepth tx"
parents
2c7b94f6
d50716fa
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
configure
View file @
8a01074d
...
...
@@ -281,7 +281,7 @@ EXPERIMENT_LIST="
spatial_svc
vp9_temporal_denoising
fp_mb_stats
emulate_hardware
_highbitdepth
emulate_hardware
"
CONFIG_LIST
=
"
external_build
...
...
test/convolve_test.cc
View file @
8a01074d
...
...
@@ -217,7 +217,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
(
VP9_FILTER_WEIGHT
>>
1
);
// Rounding
// Normalize back to 0-255...
*
output_ptr
=
clip_pixel_high
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
*
output_ptr
=
clip_pixel_high
bd
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
++
src_ptr
;
output_ptr
+=
intermediate_height
;
}
...
...
@@ -245,7 +245,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
(
VP9_FILTER_WEIGHT
>>
1
);
// Rounding
// Normalize back to 0-255...
*
dst_ptr
++
=
clip_pixel_high
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
*
dst_ptr
++
=
clip_pixel_high
bd
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
src_ptr
+=
intermediate_height
;
}
src_ptr
+=
intermediate_next_stride
;
...
...
test/dct16x16_test.cc
View file @
8a01074d
...
...
@@ -745,7 +745,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht16x16_c
,
&
vp9_iht16x16_256_add_c
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
Trans16x16DCT
,
::
testing
::
Values
(
...
...
@@ -753,7 +753,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct16x16_256_add_neon
,
0
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans16x16DCT
,
::
testing
::
Values
(
...
...
@@ -772,7 +772,7 @@ INSTANTIATE_TEST_CASE_P(
VPX_BITS_8
)));
#endif
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
Trans16x16DCT
,
::
testing
::
Values
(
...
...
test/dct32x32_test.cc
View file @
8a01074d
...
...
@@ -333,7 +333,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_c
,
1
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
Trans32x32Test
,
::
testing
::
Values
(
...
...
@@ -343,7 +343,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_neon
,
1
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans32x32Test
,
::
testing
::
Values
(
...
...
@@ -353,7 +353,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_sse2
,
1
,
VPX_BITS_8
)));
#endif
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
AVX2
,
Trans32x32Test
,
::
testing
::
Values
(
...
...
test/fdct4x4_test.cc
View file @
8a01074d
...
...
@@ -458,7 +458,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fwht4x4_c
,
&
vp9_iwht4x4_16_add_c
,
0
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
Trans4x4DCT
,
::
testing
::
Values
(
...
...
@@ -473,14 +473,15 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_neon
,
3
,
VPX_BITS_8
)));
#endif
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
MMX
,
Trans4x4WHT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fwht4x4_mmx
,
&
vp9_iwht4x4_16_add_c
,
0
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans4x4DCT
,
::
testing
::
Values
(
...
...
test/fdct8x8_test.cc
View file @
8a01074d
...
...
@@ -568,7 +568,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht8x8_c
,
&
vp9_iht8x8_64_add_c
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
FwdTrans8x8DCT
,
::
testing
::
Values
(
...
...
@@ -583,7 +583,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht8x8_c
,
&
vp9_iht8x8_64_add_neon
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
FwdTrans8x8DCT
,
::
testing
::
Values
(
...
...
@@ -598,7 +598,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht8x8_sse2
,
&
vp9_iht8x8_64_add_sse2
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
FwdTrans8x8DCT
,
::
testing
::
Values
(
...
...
test/partial_idct_test.cc
View file @
8a01074d
...
...
@@ -260,7 +260,7 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4
,
1
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
PartialIDctTest
,
::
testing
::
Values
(
...
...
@@ -294,7 +294,8 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4
,
1
)));
#endif
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3_64
,
PartialIDctTest
,
::
testing
::
Values
(
...
...
@@ -304,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(
TX_8X8
,
12
)));
#endif
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
PartialIDctTest
,
::
testing
::
Values
(
...
...
vp9/common/vp9_common.h
View file @
8a01074d
...
...
@@ -65,7 +65,7 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
}
#if CONFIG_VP9_HIGHBITDEPTH
static
INLINE
uint16_t
clip_pixel_high
(
int
val
,
int
bd
)
{
static
INLINE
uint16_t
clip_pixel_high
bd
(
int
val
,
int
bd
)
{
switch
(
bd
)
{
case
8
:
default:
...
...
vp9/common/vp9_convolve.c
View file @
8a01074d
...
...
@@ -299,7 +299,7 @@ static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
int
k
,
sum
=
0
;
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_x
[
k
]
*
x_filter
[
k
];
dst
[
x
]
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
dst
[
x
]
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
x_q4
+=
x_step_q4
;
}
src
+=
src_stride
;
...
...
@@ -325,7 +325,7 @@ static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_x
[
k
]
*
x_filter
[
k
];
dst
[
x
]
=
ROUND_POWER_OF_TWO
(
dst
[
x
]
+
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
x_q4
+=
x_step_q4
;
}
src
+=
src_stride
;
...
...
@@ -350,7 +350,7 @@ static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
int
k
,
sum
=
0
;
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_y
[
k
*
src_stride
]
*
y_filter
[
k
];
dst
[
y
*
dst_stride
]
=
clip_pixel_high
(
dst
[
y
*
dst_stride
]
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
y_q4
+=
y_step_q4
;
}
...
...
@@ -377,7 +377,7 @@ static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_y
[
k
*
src_stride
]
*
y_filter
[
k
];
dst
[
y
*
dst_stride
]
=
ROUND_POWER_OF_TWO
(
dst
[
y
*
dst_stride
]
+
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
y_q4
+=
y_step_q4
;
}
++
src
;
...
...
vp9/common/vp9_idct.c
View file @
8a01074d
This diff is collapsed.
Click to expand it.
vp9/common/vp9_reconintra.c
View file @
8a01074d
...
...
@@ -251,7 +251,7 @@ static INLINE void high_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
for
(
r
=
0
;
r
<
bs
;
r
++
)
{
for
(
c
=
0
;
c
<
bs
;
c
++
)
dst
[
c
]
=
clip_pixel_high
(
left
[
r
]
+
above
[
c
]
-
ytop_left
,
bd
);
dst
[
c
]
=
clip_pixel_high
bd
(
left
[
r
]
+
above
[
c
]
-
ytop_left
,
bd
);
dst
+=
stride
;
}
}
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
8a01074d
...
...
@@ -331,6 +331,8 @@ $vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
# dct
#
if
(
vpx_config
("
CONFIG_VP9_HIGHBITDEPTH
")
eq
"
yes
")
{
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add/
;
...
...
@@ -380,69 +382,123 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
}
else
{
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_1_add_neon_asm
=
vp9_idct4x4_1_add_neon
;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if
(
vpx_config
("
CONFIG_EMULATE_HARDWARE
")
eq
"
yes
")
{
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add/
;
add_proto
qw/void vp9_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_16_add_neon_asm
=
vp9_idct4x4_16_add_neon
;
add_proto
qw/void vp9_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_16_add/
;
add_proto
qw/void vp9_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/
;
$vp9_idct8x8_1_add_neon_asm
=
vp9_idct8x8_1_add_neon
;
add_proto
qw/void vp9_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_1_add/
;
add_proto
qw/void vp9_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_64_add_neon_asm
=
vp9_idct8x8_64_add_neon
;
add_proto
qw/void vp9_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_64_add/
;
add_proto
qw/void vp9_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_12_add_neon_asm
=
vp9_idct8x8_12_add_neon
;
add_proto
qw/void vp9_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_12_add/
;
add_proto
qw/void vp9_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/
;
$vp9_idct16x16_1_add_neon_asm
=
vp9_idct16x16_1_add_neon
;
add_proto
qw/void vp9_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_1_add/
;
add_proto
qw/void vp9_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_256_add_neon_asm
=
vp9_idct16x16_256_add_neon
;
add_proto
qw/void vp9_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_256_add/
;
add_proto
qw/void vp9_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_10_add_neon_asm
=
vp9_idct16x16_10_add_neon
;
add_proto
qw/void vp9_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_10_add/
;
add_proto
qw/void vp9_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1024_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1024_add/
;
add_proto
qw/void vp9_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_34_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_34_add/
;
add_proto
qw/void vp9_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1_add_neon_asm
=
vp9_idct32x32_1_add_neon
;
add_proto
qw/void vp9_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1_add/
;
add_proto
qw/void vp9_iht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/
;
$vp9_iht4x4_16_add_neon_asm
=
vp9_iht4x4_16_add_neon
;
add_proto
qw/void vp9_iht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht4x4_16_add/
;
add_proto
qw/void vp9_iht8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/
;
$vp9_iht8x8_64_add_neon_asm
=
vp9_iht8x8_64_add_neon
;
add_proto
qw/void vp9_iht8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht8x8_64_add/
;
add_proto
qw/void vp9_iht16x16_256_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, int tx_type
";
specialize
qw/vp9_iht16x16_256_add
sse2 dspr2
/
;
add_proto
qw/void vp9_iht16x16_256_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, int tx_type
";
specialize
qw/vp9_iht16x16_256_add/
;
# dct and add
# dct and add
add_proto
qw/void vp9_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_1_add/
;
add_proto
qw/void vp9_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_1_add/
;
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
}
else
{
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_1_add_neon_asm
=
vp9_idct4x4_1_add_neon
;
add_proto
qw/void vp9_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_16_add_neon_asm
=
vp9_idct4x4_16_add_neon
;
add_proto
qw/void vp9_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/
;
$vp9_idct8x8_1_add_neon_asm
=
vp9_idct8x8_1_add_neon
;
add_proto
qw/void vp9_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_64_add_neon_asm
=
vp9_idct8x8_64_add_neon
;
add_proto
qw/void vp9_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_12_add_neon_asm
=
vp9_idct8x8_12_add_neon
;
add_proto
qw/void vp9_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/
;
$vp9_idct16x16_1_add_neon_asm
=
vp9_idct16x16_1_add_neon
;
add_proto
qw/void vp9_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_256_add_neon_asm
=
vp9_idct16x16_256_add_neon
;
add_proto
qw/void vp9_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_10_add_neon_asm
=
vp9_idct16x16_10_add_neon
;
add_proto
qw/void vp9_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1024_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_34_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1_add_neon_asm
=
vp9_idct32x32_1_add_neon
;
add_proto
qw/void vp9_iht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/
;
$vp9_iht4x4_16_add_neon_asm
=
vp9_iht4x4_16_add_neon
;
add_proto
qw/void vp9_iht8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/
;
$vp9_iht8x8_64_add_neon_asm
=
vp9_iht8x8_64_add_neon
;
add_proto
qw/void vp9_iht16x16_256_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, int tx_type
";
specialize
qw/vp9_iht16x16_256_add sse2 dspr2/
;
# dct and add
add_proto
qw/void vp9_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_1_add/
;
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
}
}
# High bitdepth functions
...
...
@@ -689,6 +745,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# dct
#
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto
qw/void vp9_high_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/vp9_high_idct4x4_1_add/
;
...
...
vp9/encoder/vp9_resize.c
View file @
8a01074d
...
...
@@ -571,7 +571,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
+=
filter
[
k
]
*
input
[(
pk
<
0
?
0
:
(
pk
>=
inlength
?
inlength
-
1
:
pk
))];
}
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
}
else
{
// Initial part.
...
...
@@ -585,7 +585,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
+=
filter
[
k
]
*
input
[(
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
<
0
?
0
:
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
)];
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
// Middle part.
for
(;
x
<=
x2
;
++
x
,
y
+=
delta
)
{
...
...
@@ -596,7 +596,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
=
0
;
for
(
k
=
0
;
k
<
INTERP_TAPS
;
++
k
)
sum
+=
filter
[
k
]
*
input
[
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
];
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
// End part.
for
(;
x
<
outlength
;
++
x
,
y
+=
delta
)
{
...
...
@@ -609,7 +609,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
+=
filter
[
k
]
*
input
[(
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
>=
inlength
?
inlength
-
1
:
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
)];
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
}
}
...
...
@@ -635,7 +635,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
else
{
// Initial part.
...
...
@@ -645,7 +645,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
sum
+=
(
input
[(
i
-
j
<
0
?
0
:
i
-
j
)]
+
input
[
i
+
1
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// Middle part.
for
(;
i
<
l2
;
i
+=
2
)
{
...
...
@@ -654,7 +654,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
sum
+=
(
input
[
i
-
j
]
+
input
[
i
+
1
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// End part.
for
(;
i
<
length
;
i
+=
2
)
{
...
...
@@ -665,7 +665,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
}
...
...
@@ -691,7 +691,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
else
{
// Initial part.
...
...
@@ -701,7 +701,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
sum
+=
(
input
[(
i
-
j
<
0
?
0
:
i
-
j
)]
+
input
[
i
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// Middle part.
for
(;
i
<
l2
;
i
+=
2
)
{
...
...
@@ -710,7 +710,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
sum
+=
(
input
[
i
-
j
]
+
input
[
i
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// End part.
for
(;
i
<
length
;
i
+=
2
)
{
...
...
@@ -720,7 +720,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment