Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
8a01074d
Commit
8a01074d
authored
Oct 03, 2014
by
Deb Mukherjee
Committed by
Gerrit Code Review
Oct 03, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Incorporate WRAPLOW macro into non-highbitdepth tx"
parents
2c7b94f6
d50716fa
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
1162 additions
and
1066 deletions
+1162
-1066
configure
configure
+1
-1
test/convolve_test.cc
test/convolve_test.cc
+2
-2
test/dct16x16_test.cc
test/dct16x16_test.cc
+3
-3
test/dct32x32_test.cc
test/dct32x32_test.cc
+3
-3
test/fdct4x4_test.cc
test/fdct4x4_test.cc
+4
-3
test/fdct8x8_test.cc
test/fdct8x8_test.cc
+4
-3
test/partial_idct_test.cc
test/partial_idct_test.cc
+4
-3
vp9/common/vp9_common.h
vp9/common/vp9_common.h
+1
-1
vp9/common/vp9_convolve.c
vp9/common/vp9_convolve.c
+4
-4
vp9/common/vp9_idct.c
vp9/common/vp9_idct.c
+1019
-984
vp9/common/vp9_reconintra.c
vp9/common/vp9_reconintra.c
+1
-1
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_rtcd_defs.pl
+104
-46
vp9/encoder/vp9_resize.c
vp9/encoder/vp9_resize.c
+12
-12
No files found.
configure
View file @
8a01074d
...
...
@@ -281,7 +281,7 @@ EXPERIMENT_LIST="
spatial_svc
vp9_temporal_denoising
fp_mb_stats
emulate_hardware
_highbitdepth
emulate_hardware
"
CONFIG_LIST
=
"
external_build
...
...
test/convolve_test.cc
View file @
8a01074d
...
...
@@ -217,7 +217,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
(
VP9_FILTER_WEIGHT
>>
1
);
// Rounding
// Normalize back to 0-255...
*
output_ptr
=
clip_pixel_high
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
*
output_ptr
=
clip_pixel_high
bd
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
++
src_ptr
;
output_ptr
+=
intermediate_height
;
}
...
...
@@ -245,7 +245,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
(
VP9_FILTER_WEIGHT
>>
1
);
// Rounding
// Normalize back to 0-255...
*
dst_ptr
++
=
clip_pixel_high
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
*
dst_ptr
++
=
clip_pixel_high
bd
(
temp
>>
VP9_FILTER_SHIFT
,
bd
);
src_ptr
+=
intermediate_height
;
}
src_ptr
+=
intermediate_next_stride
;
...
...
test/dct16x16_test.cc
View file @
8a01074d
...
...
@@ -745,7 +745,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht16x16_c
,
&
vp9_iht16x16_256_add_c
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
Trans16x16DCT
,
::
testing
::
Values
(
...
...
@@ -753,7 +753,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct16x16_256_add_neon
,
0
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans16x16DCT
,
::
testing
::
Values
(
...
...
@@ -772,7 +772,7 @@ INSTANTIATE_TEST_CASE_P(
VPX_BITS_8
)));
#endif
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
Trans16x16DCT
,
::
testing
::
Values
(
...
...
test/dct32x32_test.cc
View file @
8a01074d
...
...
@@ -333,7 +333,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_c
,
1
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
Trans32x32Test
,
::
testing
::
Values
(
...
...
@@ -343,7 +343,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_neon
,
1
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans32x32Test
,
::
testing
::
Values
(
...
...
@@ -353,7 +353,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_sse2
,
1
,
VPX_BITS_8
)));
#endif
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
AVX2
,
Trans32x32Test
,
::
testing
::
Values
(
...
...
test/fdct4x4_test.cc
View file @
8a01074d
...
...
@@ -458,7 +458,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fwht4x4_c
,
&
vp9_iwht4x4_16_add_c
,
0
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
Trans4x4DCT
,
::
testing
::
Values
(
...
...
@@ -473,14 +473,15 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_neon
,
3
,
VPX_BITS_8
)));
#endif
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
MMX
,
Trans4x4WHT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fwht4x4_mmx
,
&
vp9_iwht4x4_16_add_c
,
0
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans4x4DCT
,
::
testing
::
Values
(
...
...
test/fdct8x8_test.cc
View file @
8a01074d
...
...
@@ -568,7 +568,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht8x8_c
,
&
vp9_iht8x8_64_add_c
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
NEON
,
FwdTrans8x8DCT
,
::
testing
::
Values
(
...
...
@@ -583,7 +583,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht8x8_c
,
&
vp9_iht8x8_64_add_neon
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
FwdTrans8x8DCT
,
::
testing
::
Values
(
...
...
@@ -598,7 +598,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht8x8_sse2
,
&
vp9_iht8x8_64_add_sse2
,
3
,
VPX_BITS_8
)));
#endif
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
FwdTrans8x8DCT
,
::
testing
::
Values
(
...
...
test/partial_idct_test.cc
View file @
8a01074d
...
...
@@ -260,7 +260,7 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4
,
1
)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
PartialIDctTest
,
::
testing
::
Values
(
...
...
@@ -294,7 +294,8 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4
,
1
)));
#endif
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3_64
,
PartialIDctTest
,
::
testing
::
Values
(
...
...
@@ -304,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(
TX_8X8
,
12
)));
#endif
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
&& !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
PartialIDctTest
,
::
testing
::
Values
(
...
...
vp9/common/vp9_common.h
View file @
8a01074d
...
...
@@ -65,7 +65,7 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
}
#if CONFIG_VP9_HIGHBITDEPTH
static
INLINE
uint16_t
clip_pixel_high
(
int
val
,
int
bd
)
{
static
INLINE
uint16_t
clip_pixel_high
bd
(
int
val
,
int
bd
)
{
switch
(
bd
)
{
case
8
:
default:
...
...
vp9/common/vp9_convolve.c
View file @
8a01074d
...
...
@@ -299,7 +299,7 @@ static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
int
k
,
sum
=
0
;
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_x
[
k
]
*
x_filter
[
k
];
dst
[
x
]
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
dst
[
x
]
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
x_q4
+=
x_step_q4
;
}
src
+=
src_stride
;
...
...
@@ -325,7 +325,7 @@ static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_x
[
k
]
*
x_filter
[
k
];
dst
[
x
]
=
ROUND_POWER_OF_TWO
(
dst
[
x
]
+
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
x_q4
+=
x_step_q4
;
}
src
+=
src_stride
;
...
...
@@ -350,7 +350,7 @@ static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
int
k
,
sum
=
0
;
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_y
[
k
*
src_stride
]
*
y_filter
[
k
];
dst
[
y
*
dst_stride
]
=
clip_pixel_high
(
dst
[
y
*
dst_stride
]
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
y_q4
+=
y_step_q4
;
}
...
...
@@ -377,7 +377,7 @@ static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
for
(
k
=
0
;
k
<
SUBPEL_TAPS
;
++
k
)
sum
+=
src_y
[
k
*
src_stride
]
*
y_filter
[
k
];
dst
[
y
*
dst_stride
]
=
ROUND_POWER_OF_TWO
(
dst
[
y
*
dst_stride
]
+
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
),
1
);
y_q4
+=
y_step_q4
;
}
++
src
;
...
...
vp9/common/vp9_idct.c
View file @
8a01074d
This diff is collapsed.
Click to expand it.
vp9/common/vp9_reconintra.c
View file @
8a01074d
...
...
@@ -251,7 +251,7 @@ static INLINE void high_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
for
(
r
=
0
;
r
<
bs
;
r
++
)
{
for
(
c
=
0
;
c
<
bs
;
c
++
)
dst
[
c
]
=
clip_pixel_high
(
left
[
r
]
+
above
[
c
]
-
ytop_left
,
bd
);
dst
[
c
]
=
clip_pixel_high
bd
(
left
[
r
]
+
above
[
c
]
-
ytop_left
,
bd
);
dst
+=
stride
;
}
}
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
8a01074d
...
...
@@ -331,6 +331,8 @@ $vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
# dct
#
if
(
vpx_config
("
CONFIG_VP9_HIGHBITDEPTH
")
eq
"
yes
")
{
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add/
;
...
...
@@ -380,69 +382,123 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
}
else
{
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_1_add_neon_asm
=
vp9_idct4x4_1_add_neon
;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if
(
vpx_config
("
CONFIG_EMULATE_HARDWARE
")
eq
"
yes
")
{
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add/
;
add_proto
qw/void vp9_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_16_add_neon_asm
=
vp9_idct4x4_16_add_neon
;
add_proto
qw/void vp9_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_16_add/
;
add_proto
qw/void vp9_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/
;
$vp9_idct8x8_1_add_neon_asm
=
vp9_idct8x8_1_add_neon
;
add_proto
qw/void vp9_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_1_add/
;
add_proto
qw/void vp9_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_64_add_neon_asm
=
vp9_idct8x8_64_add_neon
;
add_proto
qw/void vp9_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_64_add/
;
add_proto
qw/void vp9_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_12_add_neon_asm
=
vp9_idct8x8_12_add_neon
;
add_proto
qw/void vp9_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_12_add/
;
add_proto
qw/void vp9_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/
;
$vp9_idct16x16_1_add_neon_asm
=
vp9_idct16x16_1_add_neon
;
add_proto
qw/void vp9_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_1_add/
;
add_proto
qw/void vp9_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_256_add_neon_asm
=
vp9_idct16x16_256_add_neon
;
add_proto
qw/void vp9_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_256_add/
;
add_proto
qw/void vp9_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_10_add_neon_asm
=
vp9_idct16x16_10_add_neon
;
add_proto
qw/void vp9_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_10_add/
;
add_proto
qw/void vp9_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1024_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1024_add/
;
add_proto
qw/void vp9_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_34_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_34_add/
;
add_proto
qw/void vp9_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1_add_neon_asm
=
vp9_idct32x32_1_add_neon
;
add_proto
qw/void vp9_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1_add/
;
add_proto
qw/void vp9_iht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/
;
$vp9_iht4x4_16_add_neon_asm
=
vp9_iht4x4_16_add_neon
;
add_proto
qw/void vp9_iht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht4x4_16_add/
;
add_proto
qw/void vp9_iht8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/
;
$vp9_iht8x8_64_add_neon_asm
=
vp9_iht8x8_64_add_neon
;
add_proto
qw/void vp9_iht8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht8x8_64_add/
;
add_proto
qw/void vp9_iht16x16_256_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, int tx_type
";
specialize
qw/vp9_iht16x16_256_add
sse2 dspr2
/
;
add_proto
qw/void vp9_iht16x16_256_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, int tx_type
";
specialize
qw/vp9_iht16x16_256_add/
;
# dct and add
# dct and add
add_proto
qw/void vp9_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_1_add/
;
add_proto
qw/void vp9_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_1_add/
;
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
}
else
{
add_proto
qw/void vp9_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_1_add_neon_asm
=
vp9_idct4x4_1_add_neon
;
add_proto
qw/void vp9_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/
;
$vp9_idct4x4_16_add_neon_asm
=
vp9_idct4x4_16_add_neon
;
add_proto
qw/void vp9_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/
;
$vp9_idct8x8_1_add_neon_asm
=
vp9_idct8x8_1_add_neon
;
add_proto
qw/void vp9_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_64_add_neon_asm
=
vp9_idct8x8_64_add_neon
;
add_proto
qw/void vp9_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/
,
"
$ssse3_x86_64
";
$vp9_idct8x8_12_add_neon_asm
=
vp9_idct8x8_12_add_neon
;
add_proto
qw/void vp9_idct16x16_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/
;
$vp9_idct16x16_1_add_neon_asm
=
vp9_idct16x16_1_add_neon
;
add_proto
qw/void vp9_idct16x16_256_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_256_add_neon_asm
=
vp9_idct16x16_256_add_neon
;
add_proto
qw/void vp9_idct16x16_10_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/
;
$vp9_idct16x16_10_add_neon_asm
=
vp9_idct16x16_10_add_neon
;
add_proto
qw/void vp9_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1024_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_34_add_neon_asm
=
vp9_idct32x32_1024_add_neon
;
add_proto
qw/void vp9_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/
;
$vp9_idct32x32_1_add_neon_asm
=
vp9_idct32x32_1_add_neon
;
add_proto
qw/void vp9_iht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/
;
$vp9_iht4x4_16_add_neon_asm
=
vp9_iht4x4_16_add_neon
;
add_proto
qw/void vp9_iht8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type
";
specialize
qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/
;
$vp9_iht8x8_64_add_neon_asm
=
vp9_iht8x8_64_add_neon
;
add_proto
qw/void vp9_iht16x16_256_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, int tx_type
";
specialize
qw/vp9_iht16x16_256_add sse2 dspr2/
;
# dct and add
add_proto
qw/void vp9_iwht4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_1_add/
;
add_proto
qw/void vp9_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_iwht4x4_16_add/
;
}
}
# High bitdepth functions
...
...
@@ -689,6 +745,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# dct
#
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto
qw/void vp9_high_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/vp9_high_idct4x4_1_add/
;
...
...
vp9/encoder/vp9_resize.c
View file @
8a01074d
...
...
@@ -571,7 +571,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
+=
filter
[
k
]
*
input
[(
pk
<
0
?
0
:
(
pk
>=
inlength
?
inlength
-
1
:
pk
))];
}
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
}
else
{
// Initial part.
...
...
@@ -585,7 +585,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
+=
filter
[
k
]
*
input
[(
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
<
0
?
0
:
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
)];
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
// Middle part.
for
(;
x
<=
x2
;
++
x
,
y
+=
delta
)
{
...
...
@@ -596,7 +596,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
=
0
;
for
(
k
=
0
;
k
<
INTERP_TAPS
;
++
k
)
sum
+=
filter
[
k
]
*
input
[
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
];
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
// End part.
for
(;
x
<
outlength
;
++
x
,
y
+=
delta
)
{
...
...
@@ -609,7 +609,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum
+=
filter
[
k
]
*
input
[(
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
>=
inlength
?
inlength
-
1
:
int_pel
-
INTERP_TAPS
/
2
+
1
+
k
)];
*
optr
++
=
clip_pixel_high
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
*
optr
++
=
clip_pixel_high
bd
(
ROUND_POWER_OF_TWO
(
sum
,
FILTER_BITS
),
bd
);
}
}
}
...
...
@@ -635,7 +635,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
else
{
// Initial part.
...
...
@@ -645,7 +645,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
sum
+=
(
input
[(
i
-
j
<
0
?
0
:
i
-
j
)]
+
input
[
i
+
1
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// Middle part.
for
(;
i
<
l2
;
i
+=
2
)
{
...
...
@@ -654,7 +654,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
sum
+=
(
input
[
i
-
j
]
+
input
[
i
+
1
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// End part.
for
(;
i
<
length
;
i
+=
2
)
{
...
...
@@ -665,7 +665,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
}
...
...
@@ -691,7 +691,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
else
{
// Initial part.
...
...
@@ -701,7 +701,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
sum
+=
(
input
[(
i
-
j
<
0
?
0
:
i
-
j
)]
+
input
[
i
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// Middle part.
for
(;
i
<
l2
;
i
+=
2
)
{
...
...
@@ -710,7 +710,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
sum
+=
(
input
[
i
-
j
]
+
input
[
i
+
j
])
*
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
// End part.
for
(;
i
<
length
;
i
+=
2
)
{
...
...
@@ -720,7 +720,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
filter
[
j
];
}
sum
>>=
FILTER_BITS
;
*
optr
++
=
clip_pixel_high
(
sum
,
bd
);
*
optr
++
=
clip_pixel_high
bd
(
sum
,
bd
);
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment