Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
b81f04a0
Commit
b81f04a0
authored
Dec 15, 2015
by
James Zern
Committed by
Gerrit Code Review
Dec 15, 2015
Browse files
Merge "move vp9_avg to vpx_dsp"
parents
b7654afb
d36659ce
Changes
23
Hide whitespace changes
Inline
Side-by-side
test/
vp9_
avg_test.cc
→
test/avg_test.cc
View file @
b81f04a0
...
...
@@ -15,9 +15,7 @@
#include
"third_party/googletest/src/include/gtest/gtest.h"
#include
"./vpx_config.h"
#if CONFIG_VP9_ENCODER
#include
"./vp9_rtcd.h"
#endif
#include
"./vpx_dsp_rtcd.h"
#include
"test/acm_random.h"
#include
"test/clear_system_state.h"
...
...
@@ -323,91 +321,91 @@ using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P
(
C
,
AverageTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
1
,
8
,
&
vp
9
_avg_8x8_c
),
make_tuple
(
16
,
16
,
1
,
4
,
&
vp
9
_avg_4x4_c
)));
make_tuple
(
16
,
16
,
1
,
8
,
&
vp
x
_avg_8x8_c
),
make_tuple
(
16
,
16
,
1
,
4
,
&
vp
x
_avg_4x4_c
)));
INSTANTIATE_TEST_CASE_P
(
C
,
SatdTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp
9
_satd_c
),
make_tuple
(
64
,
&
vp
9
_satd_c
),
make_tuple
(
256
,
&
vp
9
_satd_c
),
make_tuple
(
1024
,
&
vp
9
_satd_c
)));
make_tuple
(
16
,
&
vp
x
_satd_c
),
make_tuple
(
64
,
&
vp
x
_satd_c
),
make_tuple
(
256
,
&
vp
x
_satd_c
),
make_tuple
(
1024
,
&
vp
x
_satd_c
)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P
(
SSE2
,
AverageTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
0
,
8
,
&
vp
9
_avg_8x8_sse2
),
make_tuple
(
16
,
16
,
5
,
8
,
&
vp
9
_avg_8x8_sse2
),
make_tuple
(
32
,
32
,
15
,
8
,
&
vp
9
_avg_8x8_sse2
),
make_tuple
(
16
,
16
,
0
,
4
,
&
vp
9
_avg_4x4_sse2
),
make_tuple
(
16
,
16
,
5
,
4
,
&
vp
9
_avg_4x4_sse2
),
make_tuple
(
32
,
32
,
15
,
4
,
&
vp
9
_avg_4x4_sse2
)));
make_tuple
(
16
,
16
,
0
,
8
,
&
vp
x
_avg_8x8_sse2
),
make_tuple
(
16
,
16
,
5
,
8
,
&
vp
x
_avg_8x8_sse2
),
make_tuple
(
32
,
32
,
15
,
8
,
&
vp
x
_avg_8x8_sse2
),
make_tuple
(
16
,
16
,
0
,
4
,
&
vp
x
_avg_4x4_sse2
),
make_tuple
(
16
,
16
,
5
,
4
,
&
vp
x
_avg_4x4_sse2
),
make_tuple
(
32
,
32
,
15
,
4
,
&
vp
x
_avg_4x4_sse2
)));
INSTANTIATE_TEST_CASE_P
(
SSE2
,
IntProRowTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp
9
_int_pro_row_sse2
,
&
vp
9
_int_pro_row_c
),
make_tuple
(
32
,
&
vp
9
_int_pro_row_sse2
,
&
vp
9
_int_pro_row_c
),
make_tuple
(
64
,
&
vp
9
_int_pro_row_sse2
,
&
vp
9
_int_pro_row_c
)));
make_tuple
(
16
,
&
vp
x
_int_pro_row_sse2
,
&
vp
x
_int_pro_row_c
),
make_tuple
(
32
,
&
vp
x
_int_pro_row_sse2
,
&
vp
x
_int_pro_row_c
),
make_tuple
(
64
,
&
vp
x
_int_pro_row_sse2
,
&
vp
x
_int_pro_row_c
)));
INSTANTIATE_TEST_CASE_P
(
SSE2
,
IntProColTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp
9
_int_pro_col_sse2
,
&
vp
9
_int_pro_col_c
),
make_tuple
(
32
,
&
vp
9
_int_pro_col_sse2
,
&
vp
9
_int_pro_col_c
),
make_tuple
(
64
,
&
vp
9
_int_pro_col_sse2
,
&
vp
9
_int_pro_col_c
)));
make_tuple
(
16
,
&
vp
x
_int_pro_col_sse2
,
&
vp
x
_int_pro_col_c
),
make_tuple
(
32
,
&
vp
x
_int_pro_col_sse2
,
&
vp
x
_int_pro_col_c
),
make_tuple
(
64
,
&
vp
x
_int_pro_col_sse2
,
&
vp
x
_int_pro_col_c
)));
INSTANTIATE_TEST_CASE_P
(
SSE2
,
SatdTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp
9
_satd_sse2
),
make_tuple
(
64
,
&
vp
9
_satd_sse2
),
make_tuple
(
256
,
&
vp
9
_satd_sse2
),
make_tuple
(
1024
,
&
vp
9
_satd_sse2
)));
make_tuple
(
16
,
&
vp
x
_satd_sse2
),
make_tuple
(
64
,
&
vp
x
_satd_sse2
),
make_tuple
(
256
,
&
vp
x
_satd_sse2
),
make_tuple
(
1024
,
&
vp
x
_satd_sse2
)));
#endif
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P
(
NEON
,
AverageTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
0
,
8
,
&
vp
9
_avg_8x8_neon
),
make_tuple
(
16
,
16
,
5
,
8
,
&
vp
9
_avg_8x8_neon
),
make_tuple
(
32
,
32
,
15
,
8
,
&
vp
9
_avg_8x8_neon
),
make_tuple
(
16
,
16
,
0
,
4
,
&
vp
9
_avg_4x4_neon
),
make_tuple
(
16
,
16
,
5
,
4
,
&
vp
9
_avg_4x4_neon
),
make_tuple
(
32
,
32
,
15
,
4
,
&
vp
9
_avg_4x4_neon
)));
make_tuple
(
16
,
16
,
0
,
8
,
&
vp
x
_avg_8x8_neon
),
make_tuple
(
16
,
16
,
5
,
8
,
&
vp
x
_avg_8x8_neon
),
make_tuple
(
32
,
32
,
15
,
8
,
&
vp
x
_avg_8x8_neon
),
make_tuple
(
16
,
16
,
0
,
4
,
&
vp
x
_avg_4x4_neon
),
make_tuple
(
16
,
16
,
5
,
4
,
&
vp
x
_avg_4x4_neon
),
make_tuple
(
32
,
32
,
15
,
4
,
&
vp
x
_avg_4x4_neon
)));
INSTANTIATE_TEST_CASE_P
(
NEON
,
IntProRowTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp
9
_int_pro_row_neon
,
&
vp
9
_int_pro_row_c
),
make_tuple
(
32
,
&
vp
9
_int_pro_row_neon
,
&
vp
9
_int_pro_row_c
),
make_tuple
(
64
,
&
vp
9
_int_pro_row_neon
,
&
vp
9
_int_pro_row_c
)));
make_tuple
(
16
,
&
vp
x
_int_pro_row_neon
,
&
vp
x
_int_pro_row_c
),
make_tuple
(
32
,
&
vp
x
_int_pro_row_neon
,
&
vp
x
_int_pro_row_c
),
make_tuple
(
64
,
&
vp
x
_int_pro_row_neon
,
&
vp
x
_int_pro_row_c
)));
INSTANTIATE_TEST_CASE_P
(
NEON
,
IntProColTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp
9
_int_pro_col_neon
,
&
vp
9
_int_pro_col_c
),
make_tuple
(
32
,
&
vp
9
_int_pro_col_neon
,
&
vp
9
_int_pro_col_c
),
make_tuple
(
64
,
&
vp
9
_int_pro_col_neon
,
&
vp
9
_int_pro_col_c
)));
make_tuple
(
16
,
&
vp
x
_int_pro_col_neon
,
&
vp
x
_int_pro_col_c
),
make_tuple
(
32
,
&
vp
x
_int_pro_col_neon
,
&
vp
x
_int_pro_col_c
),
make_tuple
(
64
,
&
vp
x
_int_pro_col_neon
,
&
vp
x
_int_pro_col_c
)));
INSTANTIATE_TEST_CASE_P
(
NEON
,
SatdTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp
9
_satd_neon
),
make_tuple
(
64
,
&
vp
9
_satd_neon
),
make_tuple
(
256
,
&
vp
9
_satd_neon
),
make_tuple
(
1024
,
&
vp
9
_satd_neon
)));
make_tuple
(
16
,
&
vp
x
_satd_neon
),
make_tuple
(
64
,
&
vp
x
_satd_neon
),
make_tuple
(
256
,
&
vp
x
_satd_neon
),
make_tuple
(
1024
,
&
vp
x
_satd_neon
)));
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P
(
MSA
,
AverageTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
0
,
8
,
&
vp
9
_avg_8x8_msa
),
make_tuple
(
16
,
16
,
5
,
8
,
&
vp
9
_avg_8x8_msa
),
make_tuple
(
32
,
32
,
15
,
8
,
&
vp
9
_avg_8x8_msa
),
make_tuple
(
16
,
16
,
0
,
4
,
&
vp
9
_avg_4x4_msa
),
make_tuple
(
16
,
16
,
5
,
4
,
&
vp
9
_avg_4x4_msa
),
make_tuple
(
32
,
32
,
15
,
4
,
&
vp
9
_avg_4x4_msa
)));
make_tuple
(
16
,
16
,
0
,
8
,
&
vp
x
_avg_8x8_msa
),
make_tuple
(
16
,
16
,
5
,
8
,
&
vp
x
_avg_8x8_msa
),
make_tuple
(
32
,
32
,
15
,
8
,
&
vp
x
_avg_8x8_msa
),
make_tuple
(
16
,
16
,
0
,
4
,
&
vp
x
_avg_4x4_msa
),
make_tuple
(
16
,
16
,
5
,
4
,
&
vp
x
_avg_4x4_msa
),
make_tuple
(
32
,
32
,
15
,
4
,
&
vp
x
_avg_4x4_msa
)));
#endif
}
// namespace
test/test.mk
View file @
b81f04a0
...
...
@@ -143,7 +143,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
vp9_avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
vp9_quantize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
vp9_subtract_test.cc
...
...
@@ -170,6 +169,11 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
endif
# VP10
## Multi-codec / unconditional whitebox tests.
ifeq
($(findstring yes,$(CONFIG_VP9_ENCODER)$(CONFIG_VP10_ENCODER)),yes)
LIBVPX_TEST_SRCS-yes
+=
avg_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)
+=
sad_test.cc
TEST_INTRA_PRED_SPEED_SRCS-yes
:=
test_intra_pred_speed.cc
...
...
vp10/common/vp10_rtcd_defs.pl
View file @
b81f04a0
...
...
@@ -351,42 +351,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
if
(
vpx_config
("
CONFIG_VP10_ENCODER
")
eq
"
yes
")
{
add_proto
qw/unsigned int vp10_avg_8x8/
,
"
const uint8_t *, int p
";
specialize
qw/vp10_avg_8x8 sse2 neon msa/
;
add_proto
qw/unsigned int vp10_avg_4x4/
,
"
const uint8_t *, int p
";
specialize
qw/vp10_avg_4x4 sse2 msa/
;
add_proto
qw/void vp10_minmax_8x8/
,
"
const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max
";
specialize
qw/vp10_minmax_8x8 sse2/
;
add_proto
qw/void vp10_hadamard_8x8/
,
"
int16_t const *src_diff, int src_stride, int16_t *coeff
";
specialize
qw/vp10_hadamard_8x8 sse2/
,
"
$ssse3_x86_64_x86inc
";
add_proto
qw/void vp10_hadamard_16x16/
,
"
int16_t const *src_diff, int src_stride, int16_t *coeff
";
specialize
qw/vp10_hadamard_16x16 sse2/
;
add_proto
qw/int16_t vp10_satd/
,
"
const int16_t *coeff, int length
";
specialize
qw/vp10_satd sse2/
;
add_proto
qw/void vp10_int_pro_row/
,
"
int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height
";
specialize
qw/vp10_int_pro_row sse2 neon/
;
add_proto
qw/int16_t vp10_int_pro_col/
,
"
uint8_t const *ref, const int width
";
specialize
qw/vp10_int_pro_col sse2 neon/
;
add_proto
qw/int vp10_vector_var/
,
"
int16_t const *ref, int16_t const *src, const int bwl
";
specialize
qw/vp10_vector_var neon sse2/
;
if
(
vpx_config
("
CONFIG_VP9_HIGHBITDEPTH
")
eq
"
yes
")
{
add_proto
qw/unsigned int vp10_highbd_avg_8x8/
,
"
const uint8_t *, int p
";
specialize
qw/vp10_highbd_avg_8x8/
;
add_proto
qw/unsigned int vp10_highbd_avg_4x4/
,
"
const uint8_t *, int p
";
specialize
qw/vp10_highbd_avg_4x4/
;
add_proto
qw/void vp10_highbd_minmax_8x8/
,
"
const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max
";
specialize
qw/vp10_highbd_minmax_8x8/
;
}
# ENCODEMB INVOKE
#
...
...
vp10/encoder/encodeframe.c
View file @
b81f04a0
...
...
@@ -536,16 +536,16 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
if
(
x8_idx
<
pixels_wide
&&
y8_idx
<
pixels_high
)
{
#if CONFIG_VP9_HIGHBITDEPTH
if
(
highbd_flag
&
YV12_FLAG_HIGHBITDEPTH
)
{
vp
10
_highbd_minmax_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
,
vp
x
_highbd_minmax_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
,
d
+
y8_idx
*
dp
+
x8_idx
,
dp
,
&
min
,
&
max
);
}
else
{
vp
10
_minmax_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
,
vp
x
_minmax_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
,
d
+
y8_idx
*
dp
+
x8_idx
,
dp
,
&
min
,
&
max
);
}
#else
vp
10
_minmax_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
,
vp
x
_minmax_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
,
d
+
y8_idx
*
dp
+
x8_idx
,
dp
,
&
min
,
&
max
);
#endif
...
...
@@ -577,18 +577,18 @@ static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
int
d_avg
=
128
;
#if CONFIG_VP9_HIGHBITDEPTH
if
(
highbd_flag
&
YV12_FLAG_HIGHBITDEPTH
)
{
s_avg
=
vp
10
_highbd_avg_4x4
(
s
+
y4_idx
*
sp
+
x4_idx
,
sp
);
s_avg
=
vp
x
_highbd_avg_4x4
(
s
+
y4_idx
*
sp
+
x4_idx
,
sp
);
if
(
!
is_key_frame
)
d_avg
=
vp
10
_highbd_avg_4x4
(
d
+
y4_idx
*
dp
+
x4_idx
,
dp
);
d_avg
=
vp
x
_highbd_avg_4x4
(
d
+
y4_idx
*
dp
+
x4_idx
,
dp
);
}
else
{
s_avg
=
vp
10
_avg_4x4
(
s
+
y4_idx
*
sp
+
x4_idx
,
sp
);
s_avg
=
vp
x
_avg_4x4
(
s
+
y4_idx
*
sp
+
x4_idx
,
sp
);
if
(
!
is_key_frame
)
d_avg
=
vp
10
_avg_4x4
(
d
+
y4_idx
*
dp
+
x4_idx
,
dp
);
d_avg
=
vp
x
_avg_4x4
(
d
+
y4_idx
*
dp
+
x4_idx
,
dp
);
}
#else
s_avg
=
vp
10
_avg_4x4
(
s
+
y4_idx
*
sp
+
x4_idx
,
sp
);
s_avg
=
vp
x
_avg_4x4
(
s
+
y4_idx
*
sp
+
x4_idx
,
sp
);
if
(
!
is_key_frame
)
d_avg
=
vp
10
_avg_4x4
(
d
+
y4_idx
*
dp
+
x4_idx
,
dp
);
d_avg
=
vp
x
_avg_4x4
(
d
+
y4_idx
*
dp
+
x4_idx
,
dp
);
#endif
sum
=
s_avg
-
d_avg
;
sse
=
sum
*
sum
;
...
...
@@ -616,18 +616,18 @@ static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
int
d_avg
=
128
;
#if CONFIG_VP9_HIGHBITDEPTH
if
(
highbd_flag
&
YV12_FLAG_HIGHBITDEPTH
)
{
s_avg
=
vp
10
_highbd_avg_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
);
s_avg
=
vp
x
_highbd_avg_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
);
if
(
!
is_key_frame
)
d_avg
=
vp
10
_highbd_avg_8x8
(
d
+
y8_idx
*
dp
+
x8_idx
,
dp
);
d_avg
=
vp
x
_highbd_avg_8x8
(
d
+
y8_idx
*
dp
+
x8_idx
,
dp
);
}
else
{
s_avg
=
vp
10
_avg_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
);
s_avg
=
vp
x
_avg_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
);
if
(
!
is_key_frame
)
d_avg
=
vp
10
_avg_8x8
(
d
+
y8_idx
*
dp
+
x8_idx
,
dp
);
d_avg
=
vp
x
_avg_8x8
(
d
+
y8_idx
*
dp
+
x8_idx
,
dp
);
}
#else
s_avg
=
vp
10
_avg_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
);
s_avg
=
vp
x
_avg_8x8
(
s
+
y8_idx
*
sp
+
x8_idx
,
sp
);
if
(
!
is_key_frame
)
d_avg
=
vp
10
_avg_8x8
(
d
+
y8_idx
*
dp
+
x8_idx
,
dp
);
d_avg
=
vp
x
_avg_8x8
(
d
+
y8_idx
*
dp
+
x8_idx
,
dp
);
#endif
sum
=
s_avg
-
d_avg
;
sse
=
sum
*
sum
;
...
...
vp10/encoder/mcomp.c
View file @
b81f04a0
...
...
@@ -1759,7 +1759,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
int
center
,
offset
=
0
;
int
bw
=
4
<<
bwl
;
// redundant variable, to be changed in the experiments.
for
(
d
=
0
;
d
<=
bw
;
d
+=
16
)
{
this_sad
=
vp
10
_vector_var
(
&
ref
[
d
],
src
,
bwl
);
this_sad
=
vp
x
_vector_var
(
&
ref
[
d
],
src
,
bwl
);
if
(
this_sad
<
best_sad
)
{
best_sad
=
this_sad
;
offset
=
d
;
...
...
@@ -1772,7 +1772,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if
(
this_pos
<
0
||
this_pos
>
bw
)
continue
;
this_sad
=
vp
10
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
this_sad
=
vp
x
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
if
(
this_sad
<
best_sad
)
{
best_sad
=
this_sad
;
center
=
this_pos
;
...
...
@@ -1785,7 +1785,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if
(
this_pos
<
0
||
this_pos
>
bw
)
continue
;
this_sad
=
vp
10
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
this_sad
=
vp
x
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
if
(
this_sad
<
best_sad
)
{
best_sad
=
this_sad
;
center
=
this_pos
;
...
...
@@ -1798,7 +1798,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if
(
this_pos
<
0
||
this_pos
>
bw
)
continue
;
this_sad
=
vp
10
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
this_sad
=
vp
x
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
if
(
this_sad
<
best_sad
)
{
best_sad
=
this_sad
;
center
=
this_pos
;
...
...
@@ -1811,7 +1811,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if
(
this_pos
<
0
||
this_pos
>
bw
)
continue
;
this_sad
=
vp
10
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
this_sad
=
vp
x
_vector_var
(
&
ref
[
this_pos
],
src
,
bwl
);
if
(
this_sad
<
best_sad
)
{
best_sad
=
this_sad
;
center
=
this_pos
;
...
...
@@ -1880,25 +1880,25 @@ unsigned int vp10_int_pro_motion_estimation(const VP10_COMP *cpi, MACROBLOCK *x,
// Set up prediction 1-D reference set
ref_buf
=
xd
->
plane
[
0
].
pre
[
0
].
buf
-
(
bw
>>
1
);
for
(
idx
=
0
;
idx
<
search_width
;
idx
+=
16
)
{
vp
10
_int_pro_row
(
&
hbuf
[
idx
],
ref_buf
,
ref_stride
,
bh
);
vp
x
_int_pro_row
(
&
hbuf
[
idx
],
ref_buf
,
ref_stride
,
bh
);
ref_buf
+=
16
;
}
ref_buf
=
xd
->
plane
[
0
].
pre
[
0
].
buf
-
(
bh
>>
1
)
*
ref_stride
;
for
(
idx
=
0
;
idx
<
search_height
;
++
idx
)
{
vbuf
[
idx
]
=
vp
10
_int_pro_col
(
ref_buf
,
bw
)
>>
norm_factor
;
vbuf
[
idx
]
=
vp
x
_int_pro_col
(
ref_buf
,
bw
)
>>
norm_factor
;
ref_buf
+=
ref_stride
;
}
// Set up src 1-D reference set
for
(
idx
=
0
;
idx
<
bw
;
idx
+=
16
)
{
src_buf
=
x
->
plane
[
0
].
src
.
buf
+
idx
;
vp
10
_int_pro_row
(
&
src_hbuf
[
idx
],
src_buf
,
src_stride
,
bh
);
vp
x
_int_pro_row
(
&
src_hbuf
[
idx
],
src_buf
,
src_stride
,
bh
);
}
src_buf
=
x
->
plane
[
0
].
src
.
buf
;
for
(
idx
=
0
;
idx
<
bh
;
++
idx
)
{
src_vbuf
[
idx
]
=
vp
10
_int_pro_col
(
src_buf
,
bw
)
>>
norm_factor
;
src_vbuf
[
idx
]
=
vp
x
_int_pro_col
(
src_buf
,
bw
)
>>
norm_factor
;
src_buf
+=
src_stride
;
}
...
...
vp10/vp10cx.mk
View file @
b81f04a0
...
...
@@ -17,7 +17,6 @@ VP10_CX_SRCS_REMOVE-no += $(VP10_COMMON_SRCS_REMOVE-no)
VP10_CX_SRCS-yes
+=
vp10_cx_iface.c
VP10_CX_SRCS-yes
+=
encoder/avg.c
VP10_CX_SRCS-yes
+=
encoder/bitstream.c
VP10_CX_SRCS-yes
+=
encoder/context_tree.c
VP10_CX_SRCS-yes
+=
encoder/context_tree.h
...
...
@@ -87,7 +86,6 @@ VP10_CX_SRCS-yes += encoder/temporal_filter.h
VP10_CX_SRCS-yes
+=
encoder/mbgraph.c
VP10_CX_SRCS-yes
+=
encoder/mbgraph.h
VP10_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/avg_intrin_sse2.c
VP10_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/temporal_filter_apply_sse2.asm
VP10_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/quantize_sse2.c
ifeq
($(CONFIG_VP9_HIGHBITDEPTH),yes)
...
...
@@ -102,7 +100,6 @@ endif
ifeq
($(ARCH_X86_64),yes)
ifeq
($(CONFIG_USE_X86INC),yes)
VP10_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/quantize_ssse3_x86_64.asm
VP10_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/dct_ssse3_x86_64.asm
endif
endif
...
...
@@ -119,10 +116,8 @@ ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP10_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/dct_neon.c
VP10_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/error_neon.c
endif
VP10_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/avg_neon.c
VP10_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/quantize_neon.c
VP10_CX_SRCS-$(HAVE_MSA)
+=
encoder/mips/msa/avg_msa.c
VP10_CX_SRCS-$(HAVE_MSA)
+=
encoder/mips/msa/error_msa.c
VP10_CX_SRCS-$(HAVE_MSA)
+=
encoder/mips/msa/fdct4x4_msa.c
VP10_CX_SRCS-$(HAVE_MSA)
+=
encoder/mips/msa/fdct8x8_msa.c
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
b81f04a0
...
...
@@ -194,42 +194,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
if
(
vpx_config
("
CONFIG_VP9_ENCODER
")
eq
"
yes
")
{
add_proto
qw/unsigned int vp9_avg_8x8/
,
"
const uint8_t *, int p
";
specialize
qw/vp9_avg_8x8 sse2 neon msa/
;
add_proto
qw/unsigned int vp9_avg_4x4/
,
"
const uint8_t *, int p
";
specialize
qw/vp9_avg_4x4 sse2 neon msa/
;
add_proto
qw/void vp9_minmax_8x8/
,
"
const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max
";
specialize
qw/vp9_minmax_8x8 sse2/
;
add_proto
qw/void vp9_hadamard_8x8/
,
"
int16_t const *src_diff, int src_stride, int16_t *coeff
";
specialize
qw/vp9_hadamard_8x8 sse2/
,
"
$ssse3_x86_64_x86inc
";
add_proto
qw/void vp9_hadamard_16x16/
,
"
int16_t const *src_diff, int src_stride, int16_t *coeff
";
specialize
qw/vp9_hadamard_16x16 sse2/
;
add_proto
qw/int vp9_satd/
,
"
const int16_t *coeff, int length
";
specialize
qw/vp9_satd sse2 neon/
;
add_proto
qw/void vp9_int_pro_row/
,
"
int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height
";
specialize
qw/vp9_int_pro_row sse2 neon/
;
add_proto
qw/int16_t vp9_int_pro_col/
,
"
uint8_t const *ref, const int width
";
specialize
qw/vp9_int_pro_col sse2 neon/
;
add_proto
qw/int vp9_vector_var/
,
"
int16_t const *ref, int16_t const *src, const int bwl
";
specialize
qw/vp9_vector_var neon sse2/
;
if
(
vpx_config
("
CONFIG_VP9_HIGHBITDEPTH
")
eq
"
yes
")
{
add_proto
qw/unsigned int vp9_highbd_avg_8x8/
,
"
const uint8_t *, int p
";
specialize
qw/vp9_highbd_avg_8x8/
;
add_proto
qw/unsigned int vp9_highbd_avg_4x4/
,
"
const uint8_t *, int p
";
specialize
qw/vp9_highbd_avg_4x4/
;
add_proto
qw/void vp9_highbd_minmax_8x8/
,
"
const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max
";
specialize
qw/vp9_highbd_minmax_8x8/
;
}
# ENCODEMB INVOKE
#
...
...
vp9/encoder/arm/neon/vp9_avg_neon.c
deleted
100644 → 0
View file @
b7654afb
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<arm_neon.h>
#include
<assert.h>
#include
"./vp9_rtcd.h"
#include
"./vpx_config.h"
#include
"vpx/vpx_integer.h"
static
INLINE
unsigned
int
horizontal_add_u16x8
(
const
uint16x8_t
v_16x8
)
{
const
uint32x4_t
a
=
vpaddlq_u16
(
v_16x8
);
const
uint64x2_t
b
=
vpaddlq_u32
(
a
);
const
uint32x2_t
c
=
vadd_u32
(
vreinterpret_u32_u64
(
vget_low_u64
(
b
)),
vreinterpret_u32_u64
(
vget_high_u64
(
b
)));
return
vget_lane_u32
(
c
,
0
);
}
unsigned
int
vp9_avg_4x4_neon
(
const
uint8_t
*
s
,
int
p
)
{
uint16x8_t
v_sum
;
uint32x2_t
v_s0
=
vdup_n_u32
(
0
);
uint32x2_t
v_s1
=
vdup_n_u32
(
0
);
v_s0
=
vld1_lane_u32
((
const
uint32_t
*
)
s
,
v_s0
,
0
);
v_s0
=
vld1_lane_u32
((
const
uint32_t
*
)(
s
+
p
),
v_s0
,
1
);
v_s1
=
vld1_lane_u32
((
const
uint32_t
*
)(
s
+
2
*
p
),
v_s1
,
0
);
v_s1
=
vld1_lane_u32
((
const
uint32_t
*
)(
s
+
3
*
p
),
v_s1
,
1
);
v_sum
=
vaddl_u8
(
vreinterpret_u8_u32
(
v_s0
),
vreinterpret_u8_u32
(
v_s1
));
return
(
horizontal_add_u16x8
(
v_sum
)
+
8
)
>>
4
;
}
unsigned
int
vp9_avg_8x8_neon
(
const
uint8_t
*
s
,
int
p
)
{
uint8x8_t
v_s0
=
vld1_u8
(
s
);
const
uint8x8_t
v_s1
=
vld1_u8
(
s
+
p
);
uint16x8_t
v_sum
=
vaddl_u8
(
v_s0
,
v_s1
);
v_s0
=
vld1_u8
(
s
+
2
*
p
);
v_sum
=
vaddw_u8
(
v_sum
,
v_s0
);
v_s0
=
vld1_u8
(
s
+
3
*
p
);
v_sum
=
vaddw_u8
(
v_sum
,
v_s0
);
v_s0
=
vld1_u8
(
s
+
4
*
p
);
v_sum
=
vaddw_u8
(
v_sum
,
v_s0
);
v_s0
=
vld1_u8
(
s
+
5
*
p
);
v_sum
=
vaddw_u8
(
v_sum
,
v_s0
);
v_s0
=
vld1_u8
(
s
+
6
*
p
);
v_sum
=
vaddw_u8
(
v_sum
,
v_s0
);
v_s0
=
vld1_u8
(
s
+
7
*
p
);
v_sum
=
vaddw_u8
(
v_sum
,
v_s0
);
return
(
horizontal_add_u16x8
(
v_sum
)
+
32
)
>>
6
;
}
// coeff: 16 bits, dynamic range [-32640, 32640].
// length: value range {16, 64, 256, 1024}.
int
vp9_satd_neon
(
const
int16_t
*
coeff
,
int
length
)
{
const
int16x4_t
zero
=
vdup_n_s16
(
0
);
int32x4_t
accum
=
vdupq_n_s32
(
0
);
do
{
const
int16x8_t
src0
=
vld1q_s16
(
coeff
);
const
int16x8_t
src8
=
vld1q_s16
(
coeff
+
8
);
accum
=
vabal_s16
(
accum
,
vget_low_s16
(
src0
),
zero
);
accum
=
vabal_s16
(
accum
,
vget_high_s16
(
src0
),
zero
);
accum
=
vabal_s16
(
accum
,
vget_low_s16
(
src8
),
zero
);
accum
=
vabal_s16
(
accum
,
vget_high_s16
(
src8
),
zero
);
length
-=
16
;
coeff
+=
16
;
}
while
(
length
!=
0
);
{
// satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
const
int64x2_t
s0
=
vpaddlq_s32
(
accum
);
// cascading summation of 'accum'.
const
int32x2_t
s1
=
vadd_s32
(
vreinterpret_s32_s64
(
vget_low_s64
(
s0
)),
vreinterpret_s32_s64
(
vget_high_s64
(
s0
)));
const
int
satd
=
vget_lane_s32
(
s1
,
0
);
return
satd
;
}
}
void
vp9_int_pro_row_neon
(
int16_t
hbuf
[
16
],
uint8_t
const
*
ref
,
const
int
ref_stride
,
const
int
height
)
{
int
i
;
uint16x8_t
vec_sum_lo
=
vdupq_n_u16
(
0
);
uint16x8_t
vec_sum_hi
=
vdupq_n_u16
(
0
);
const
int
shift_factor
=
((
height
>>
5
)
+
3
)
*
-
1
;
const
int16x8_t
vec_shift
=
vdupq_n_s16
(
shift_factor
);
for
(
i
=
0
;
i
<
height
;
i
+=
8
)
{
const
uint8x16_t
vec_row1
=
vld1q_u8
(
ref
);
const
uint8x16_t
vec_row2
=
vld1q_u8
(
ref
+
ref_stride
);
const
uint8x16_t
vec_row3
=
vld1q_u8
(
ref
+
ref_stride
*
2
);
const
uint8x16_t
vec_row4
=
vld1q_u8
(
ref
+
ref_stride
*
3
);
const
uint8x16_t
vec_row5
=
vld1q_u8
(
ref
+
ref_stride
*
4
);
const
uint8x16_t
vec_row6
=
vld1q_u8
(
ref
+
ref_stride
*
5
);
const
uint8x16_t
vec_row7
=
vld1q_u8
(
ref
+
ref_stride
*
6
);
const
uint8x16_t
vec_row8
=
vld1q_u8
(
ref
+
ref_stride
*
7
);
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row1
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row1
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row2
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row2
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row3
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row3
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row4
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row4
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row5
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row5
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row6
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row6
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row7
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row7
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row8
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row8
));
ref
+=
ref_stride
*
8
;
}
vec_sum_lo
=
vshlq_u16
(
vec_sum_lo
,
vec_shift
);
vec_sum_hi
=
vshlq_u16
(
vec_sum_hi
,
vec_shift
);
vst1q_s16
(
hbuf
,
vreinterpretq_s16_u16
(
vec_sum_lo
));
hbuf
+=
8
;
vst1q_s16
(
hbuf
,
vreinterpretq_s16_u16
(
vec_sum_hi
));
}
int16_t
vp9_int_pro_col_neon
(
uint8_t
const
*
ref
,
const
int
width
)
{
int
i
;
uint16x8_t
vec_sum
=
vdupq_n_u16
(
0
);
for
(
i
=
0
;
i
<
width
;
i
+=
16
)
{
const
uint8x16_t
vec_row
=
vld1q_u8
(
ref
);
vec_sum
=
vaddw_u8
(
vec_sum
,
vget_low_u8
(
vec_row
));
vec_sum
=
vaddw_u8
(
vec_sum
,
vget_high_u8
(
vec_row
));
ref
+=
16
;
}
return
horizontal_add_u16x8
(
vec_sum
);
}
// ref, src = [0, 510] - max diff = 16-bits