Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
938b8dfc
Commit
938b8dfc
authored
Mar 04, 2016
by
Geza Lore
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Extend convolution functions to 128x128 for ext-partition.
Change-Id: I7f7e26cd1d58eb38417200550c6fbf4108c9f942
parent
697bf5be
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
366 additions
and
134 deletions
+366
-134
test/convolve_test.cc
test/convolve_test.cc
+100
-28
test/masked_sad_test.cc
test/masked_sad_test.cc
+0
-2
test/masked_variance_test.cc
test/masked_variance_test.cc
+0
-2
vpx_dsp/vpx_convolve.c
vpx_dsp/vpx_convolve.c
+30
-23
vpx_dsp/vpx_convolve.h
vpx_dsp/vpx_convolve.h
+18
-0
vpx_dsp/vpx_dsp_common.h
vpx_dsp/vpx_dsp_common.h
+6
-0
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/vpx_dsp_rtcd_defs.pl
+35
-43
vpx_dsp/x86/convolve.h
vpx_dsp/x86/convolve.h
+35
-23
vpx_dsp/x86/vpx_convolve_copy_sse2.asm
vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+116
-2
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+26
-11
No files found.
test/convolve_test.cc
View file @
938b8dfc
...
...
@@ -28,7 +28,7 @@
namespace
{
static
const
unsigned
int
kMaxDimension
=
64
;
static
const
unsigned
int
kMaxDimension
=
MAX_CU_SIZE
;
typedef
void
(
*
ConvolveFunc
)(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
...
...
@@ -102,7 +102,7 @@ void filter_block2d_8_c(const uint8_t *src_ptr,
// = 23
// and filter_max_width = 16
//
uint8_t
intermediate_buffer
[
71
*
kMaxDimension
];
uint8_t
intermediate_buffer
[
(
kMaxDimension
+
8
)
*
kMaxDimension
];
const
int
intermediate_next_stride
=
1
-
intermediate_height
*
output_width
;
// Horizontal pass (src -> transposed intermediate).
...
...
@@ -183,9 +183,9 @@ void filter_average_block2d_8_c(const uint8_t *src_ptr,
assert
(
output_width
<=
kMaxDimension
);
assert
(
output_height
<=
kMaxDimension
);
filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
64
,
filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
kMaxDimension
,
output_width
,
output_height
);
block2d_average_c
(
tmp
,
64
,
dst_ptr
,
dst_stride
,
block2d_average_c
(
tmp
,
kMaxDimension
,
dst_ptr
,
dst_stride
,
output_width
,
output_height
);
}
...
...
@@ -214,7 +214,7 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
* = 23
* and filter_max_width = 16
*/
uint16_t
intermediate_buffer
[
71
*
kMaxDimension
];
uint16_t
intermediate_buffer
[
(
kMaxDimension
+
8
)
*
kMaxDimension
];
const
int
intermediate_next_stride
=
1
-
intermediate_height
*
output_width
;
// Horizontal pass (src -> transposed intermediate).
...
...
@@ -302,9 +302,10 @@ void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
assert
(
output_width
<=
kMaxDimension
);
assert
(
output_height
<=
kMaxDimension
);
highbd_filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
64
,
highbd_filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
kMaxDimension
,
output_width
,
output_height
,
bd
);
highbd_block2d_average_c
(
tmp
,
64
,
dst_ptr
,
dst_stride
,
highbd_block2d_average_c
(
tmp
,
kMaxDimension
,
dst_ptr
,
dst_stride
,
output_width
,
output_height
);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
...
...
@@ -351,7 +352,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
protected:
static
const
int
kDataAlignment
=
16
;
static
const
int
kOuterBlockSize
=
256
;
static
const
int
kOuterBlockSize
=
4
*
kMaxDimension
;
static
const
int
kInputStride
=
kOuterBlockSize
;
static
const
int
kOutputStride
=
kOuterBlockSize
;
static
const
int
kInputBufferSize
=
kOuterBlockSize
*
kOuterBlockSize
;
...
...
@@ -414,7 +415,8 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
void
CopyOutputToRef
()
{
memcpy
(
output_ref_
,
output_
,
kOutputBufferSize
);
#if CONFIG_VP9_HIGHBITDEPTH
memcpy
(
output16_ref_
,
output16_
,
kOutputBufferSize
);
memcpy
(
output16_ref_
,
output16_
,
kOutputBufferSize
*
sizeof
(
*
output16_ref_
));
#endif
}
...
...
@@ -426,41 +428,41 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
}
uint8_t
*
input
()
const
{
const
int
index
=
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
();
#if CONFIG_VP9_HIGHBITDEPTH
if
(
UUT_
->
use_highbd_
==
0
)
{
return
input_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
input_
+
index
;
}
else
{
return
CONVERT_TO_BYTEPTR
(
input16_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
());
return
CONVERT_TO_BYTEPTR
(
input16_
)
+
index
;
}
#else
return
input_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
input_
+
index
;
#endif
}
uint8_t
*
output
()
const
{
const
int
index
=
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
();
#if CONFIG_VP9_HIGHBITDEPTH
if
(
UUT_
->
use_highbd_
==
0
)
{
return
output_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_
+
index
;
}
else
{
return
CONVERT_TO_BYTEPTR
(
output16_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
());
return
CONVERT_TO_BYTEPTR
(
output16_
+
index
);
}
#else
return
output_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_
+
index
;
#endif
}
uint8_t
*
output_ref
()
const
{
const
int
index
=
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
();
#if CONFIG_VP9_HIGHBITDEPTH
if
(
UUT_
->
use_highbd_
==
0
)
{
return
output_ref_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_ref_
+
index
;
}
else
{
return
CONVERT_TO_BYTEPTR
(
output16_ref_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
());
return
CONVERT_TO_BYTEPTR
(
output16_ref_
+
index
);
}
#else
return
output_ref_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_ref_
+
index
;
#endif
}
...
...
@@ -1035,6 +1037,11 @@ const ConvolveFunctions convolve8_c(
wrap_convolve8_vert_c_8
,
wrap_convolve8_avg_vert_c_8
,
wrap_convolve8_c_8
,
wrap_convolve8_avg_c_8
,
8
);
INSTANTIATE_TEST_CASE_P
(
C_8
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_c
),
make_tuple
(
64
,
128
,
&
convolve8_c
),
make_tuple
(
128
,
128
,
&
convolve8_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_c
),
make_tuple
(
8
,
4
,
&
convolve8_c
),
make_tuple
(
4
,
8
,
&
convolve8_c
),
...
...
@@ -1057,6 +1064,11 @@ const ConvolveFunctions convolve10_c(
wrap_convolve8_vert_c_10
,
wrap_convolve8_avg_vert_c_10
,
wrap_convolve8_c_10
,
wrap_convolve8_avg_c_10
,
10
);
INSTANTIATE_TEST_CASE_P
(
C_10
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve10_c
),
make_tuple
(
64
,
128
,
&
convolve10_c
),
make_tuple
(
128
,
128
,
&
convolve10_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve10_c
),
make_tuple
(
8
,
4
,
&
convolve10_c
),
make_tuple
(
4
,
8
,
&
convolve10_c
),
...
...
@@ -1079,6 +1091,11 @@ const ConvolveFunctions convolve12_c(
wrap_convolve8_vert_c_12
,
wrap_convolve8_avg_vert_c_12
,
wrap_convolve8_c_12
,
wrap_convolve8_avg_c_12
,
12
);
INSTANTIATE_TEST_CASE_P
(
C_12
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve12_c
),
make_tuple
(
64
,
128
,
&
convolve12_c
),
make_tuple
(
128
,
128
,
&
convolve12_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve12_c
),
make_tuple
(
8
,
4
,
&
convolve12_c
),
make_tuple
(
4
,
8
,
&
convolve12_c
),
...
...
@@ -1105,6 +1122,11 @@ const ConvolveFunctions convolve8_c(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
C
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_c
),
make_tuple
(
64
,
128
,
&
convolve8_c
),
make_tuple
(
128
,
128
,
&
convolve8_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_c
),
make_tuple
(
8
,
4
,
&
convolve8_c
),
make_tuple
(
4
,
8
,
&
convolve8_c
),
...
...
@@ -1158,7 +1180,12 @@ const ConvolveFunctions convolve12_sse2(
wrap_convolve8_horiz_sse2_12
,
wrap_convolve8_avg_horiz_sse2_12
,
wrap_convolve8_vert_sse2_12
,
wrap_convolve8_avg_vert_sse2_12
,
wrap_convolve8_sse2_12
,
wrap_convolve8_avg_sse2_12
,
12
);
INSTANTIATE_TEST_CASE_P
(
SSE2
,
ConvolveTest
,
::
testing
::
Values
(
INSTANTIATE_TEST_CASE_P
(
SSE2_8
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
128
,
&
convolve8_sse2
),
make_tuple
(
128
,
128
,
&
convolve8_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_sse2
),
make_tuple
(
8
,
4
,
&
convolve8_sse2
),
make_tuple
(
4
,
8
,
&
convolve8_sse2
),
...
...
@@ -1171,7 +1198,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
make_tuple
(
32
,
32
,
&
convolve8_sse2
),
make_tuple
(
64
,
32
,
&
convolve8_sse2
),
make_tuple
(
32
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
64
,
&
convolve8_sse2
)));
INSTANTIATE_TEST_CASE_P
(
SSE2_10
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve10_sse2
),
make_tuple
(
64
,
128
,
&
convolve10_sse2
),
make_tuple
(
128
,
128
,
&
convolve10_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve10_sse2
),
make_tuple
(
8
,
4
,
&
convolve10_sse2
),
make_tuple
(
4
,
8
,
&
convolve10_sse2
),
...
...
@@ -1184,7 +1217,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
make_tuple
(
32
,
32
,
&
convolve10_sse2
),
make_tuple
(
64
,
32
,
&
convolve10_sse2
),
make_tuple
(
32
,
64
,
&
convolve10_sse2
),
make_tuple
(
64
,
64
,
&
convolve10_sse2
),
make_tuple
(
64
,
64
,
&
convolve10_sse2
)));
INSTANTIATE_TEST_CASE_P
(
SSE2_12
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve12_sse2
),
make_tuple
(
64
,
128
,
&
convolve12_sse2
),
make_tuple
(
128
,
128
,
&
convolve12_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve12_sse2
),
make_tuple
(
8
,
4
,
&
convolve12_sse2
),
make_tuple
(
4
,
8
,
&
convolve12_sse2
),
...
...
@@ -1213,6 +1252,11 @@ const ConvolveFunctions convolve8_sse2(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
SSE2
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
128
,
&
convolve8_sse2
),
make_tuple
(
128
,
128
,
&
convolve8_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_sse2
),
make_tuple
(
8
,
4
,
&
convolve8_sse2
),
make_tuple
(
4
,
8
,
&
convolve8_sse2
),
...
...
@@ -1237,9 +1281,14 @@ const ConvolveFunctions convolve8_ssse3(
vpx_convolve8_ssse3
,
vpx_convolve8_avg_ssse3
,
vpx_scaled_horiz_c
,
vpx_scaled_avg_horiz_c
,
vpx_scaled_vert_c
,
vpx_scaled_avg_vert_c
,
vpx_scaled_2d_
c
,
vpx_scaled_avg_2d_c
,
0
);
vpx_scaled_2d_
ssse3
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_ssse3
),
make_tuple
(
64
,
128
,
&
convolve8_ssse3
),
make_tuple
(
128
,
128
,
&
convolve8_ssse3
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_ssse3
),
make_tuple
(
8
,
4
,
&
convolve8_ssse3
),
make_tuple
(
4
,
8
,
&
convolve8_ssse3
),
...
...
@@ -1266,6 +1315,11 @@ const ConvolveFunctions convolve8_avx2(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
AVX2
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_avx2
),
make_tuple
(
64
,
128
,
&
convolve8_avx2
),
make_tuple
(
128
,
128
,
&
convolve8_avx2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_avx2
),
make_tuple
(
8
,
4
,
&
convolve8_avx2
),
make_tuple
(
4
,
8
,
&
convolve8_avx2
),
...
...
@@ -1281,7 +1335,8 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
make_tuple
(
64
,
64
,
&
convolve8_avx2
)));
#endif // HAVE_AVX2 && HAVE_SSSE3
#if HAVE_NEON
// TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes
#if HAVE_NEON && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
#if HAVE_NEON_ASM
const
ConvolveFunctions
convolve8_neon
(
vpx_convolve_copy_neon
,
vpx_convolve_avg_neon
,
...
...
@@ -1303,6 +1358,11 @@ const ConvolveFunctions convolve8_neon(
#endif // HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P
(
NEON
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_neon
),
make_tuple
(
64
,
128
,
&
convolve8_neon
),
make_tuple
(
128
,
128
,
&
convolve8_neon
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_neon
),
make_tuple
(
8
,
4
,
&
convolve8_neon
),
make_tuple
(
4
,
8
,
&
convolve8_neon
),
...
...
@@ -1318,7 +1378,8 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
make_tuple
(
64
,
64
,
&
convolve8_neon
)));
#endif // HAVE_NEON
#if HAVE_DSPR2
// TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
#if HAVE_DSPR2 && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
const
ConvolveFunctions
convolve8_dspr2
(
vpx_convolve_copy_dspr2
,
vpx_convolve_avg_dspr2
,
vpx_convolve8_horiz_dspr2
,
vpx_convolve8_avg_horiz_dspr2
,
...
...
@@ -1329,6 +1390,11 @@ const ConvolveFunctions convolve8_dspr2(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
DSPR2
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_dspr2
),
make_tuple
(
64
,
128
,
&
convolve8_dspr2
),
make_tuple
(
128
,
128
,
&
convolve8_dspr2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_dspr2
),
make_tuple
(
8
,
4
,
&
convolve8_dspr2
),
make_tuple
(
4
,
8
,
&
convolve8_dspr2
),
...
...
@@ -1344,7 +1410,8 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
make_tuple
(
64
,
64
,
&
convolve8_dspr2
)));
#endif
#if HAVE_MSA
// TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
#if HAVE_MSA && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
const
ConvolveFunctions
convolve8_msa
(
vpx_convolve_copy_msa
,
vpx_convolve_avg_msa
,
vpx_convolve8_horiz_msa
,
vpx_convolve8_avg_horiz_msa
,
...
...
@@ -1355,6 +1422,11 @@ const ConvolveFunctions convolve8_msa(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
MSA
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_msa
),
make_tuple
(
64
,
128
,
&
convolve8_msa
),
make_tuple
(
128
,
128
,
&
convolve8_msa
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_msa
),
make_tuple
(
8
,
4
,
&
convolve8_msa
),
make_tuple
(
4
,
8
,
&
convolve8_msa
),
...
...
test/masked_sad_test.cc
View file @
938b8dfc
...
...
@@ -22,8 +22,6 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#define MAX_CU_SIZE 128
using
libvpx_test
::
ACMRandom
;
namespace
{
...
...
test/masked_variance_test.cc
View file @
938b8dfc
...
...
@@ -25,8 +25,6 @@
#include "vpx_dsp/vpx_filter.h"
#include "vpx_mem/vpx_mem.h"
#define MAX_CU_SIZE 128
using
libvpx_test
::
ACMRandom
;
namespace
{
...
...
vpx_dsp/vpx_convolve.c
View file @
938b8dfc
...
...
@@ -130,18 +130,21 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint8_t
temp
[
135
*
64
];
uint8_t
temp
[
MAX_EXT_SIZE
*
MAX_CU_SIZE
];
int
intermediate_height
=
(((
h
-
1
)
*
y_step_q4
+
y0_q4
)
>>
SUBPEL_BITS
)
+
SUBPEL_TAPS
;
assert
(
w
<=
64
);
assert
(
h
<=
64
);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
assert
(
y_step_q4
<=
32
);
assert
(
x_step_q4
<=
32
);
convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
temp
,
64
,
convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
temp
,
MAX_CU_SIZE
,
x_filters
,
x0_q4
,
x_step_q4
,
w
,
intermediate_height
);
convolve_vert
(
temp
+
64
*
(
SUBPEL_TAPS
/
2
-
1
),
64
,
dst
,
dst_stride
,
convolve_vert
(
temp
+
MAX_CU_SIZE
*
(
SUBPEL_TAPS
/
2
-
1
),
MAX_CU_SIZE
,
dst
,
dst_stride
,
y_filters
,
y0_q4
,
y_step_q4
,
w
,
h
);
}
...
...
@@ -237,13 +240,14 @@ void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
/* Fixed size intermediate buffer places limits on parameters. */
DECLARE_ALIGNED
(
16
,
uint8_t
,
temp
[
64
*
64
]);
assert
(
w
<=
64
);
assert
(
h
<=
64
);
DECLARE_ALIGNED
(
16
,
uint8_t
,
temp
[
MAX_CU_SIZE
*
MAX_CU_SIZE
]);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
vpx_convolve8_c
(
src
,
src_stride
,
temp
,
64
,
vpx_convolve8_c
(
src
,
src_stride
,
temp
,
MAX_CU_SIZE
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
vpx_convolve_avg_c
(
temp
,
64
,
dst
,
dst_stride
,
NULL
,
0
,
NULL
,
0
,
w
,
h
);
vpx_convolve_avg_c
(
temp
,
MAX_CU_SIZE
,
dst
,
dst_stride
,
NULL
,
0
,
NULL
,
0
,
w
,
h
);
}
void
vpx_convolve_copy_c
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
...
...
@@ -459,22 +463,23 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint16_t
temp
[
64
*
135
];
uint16_t
temp
[
MAX_EXT_SIZE
*
MAX_CU_SIZE
];
int
intermediate_height
=
(((
h
-
1
)
*
y_step_q4
+
y0_q4
)
>>
SUBPEL_BITS
)
+
SUBPEL_TAPS
;
assert
(
w
<=
64
);
assert
(
h
<=
64
);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
assert
(
y_step_q4
<=
32
);
assert
(
x_step_q4
<=
32
);
highbd_convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
64
,
highbd_convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
MAX_CU_SIZE
,
x_filters
,
x0_q4
,
x_step_q4
,
w
,
intermediate_height
,
bd
);
highbd_convolve_vert
(
CONVERT_TO_BYTEPTR
(
temp
)
+
64
*
(
SUBPEL_TAPS
/
2
-
1
),
64
,
dst
,
dst_stride
,
y_filters
,
y0_q4
,
y_step_q4
,
w
,
h
,
bd
);
highbd_convolve_vert
(
CONVERT_TO_BYTEPTR
(
temp
)
+
MAX_CU_SIZE
*
(
SUBPEL_TAPS
/
2
-
1
),
MAX_CU_SIZE
,
dst
,
dst_stride
,
y_filters
,
y0_q4
,
y_step_q4
,
w
,
h
,
bd
);
}
...
...
@@ -556,13 +561,15 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
,
int
bd
)
{
// Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED
(
16
,
uint16_t
,
temp
[
64
*
64
]);
assert
(
w
<=
64
);
assert
(
h
<=
64
);
DECLARE_ALIGNED
(
16
,
uint16_t
,
temp
[
MAX_CU_SIZE
*
MAX_CU_SIZE
]);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
vpx_highbd_convolve8_c
(
src
,
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
64
,
vpx_highbd_convolve8_c
(
src
,
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
MAX_CU_SIZE
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
,
bd
);
vpx_highbd_convolve_avg_c
(
CONVERT_TO_BYTEPTR
(
temp
),
64
,
dst
,
dst_stride
,
vpx_highbd_convolve_avg_c
(
CONVERT_TO_BYTEPTR
(
temp
),
MAX_CU_SIZE
,
dst
,
dst_stride
,
NULL
,
0
,
NULL
,
0
,
w
,
h
,
bd
);
}
...
...
vpx_dsp/vpx_convolve.h
View file @
938b8dfc
...
...
@@ -17,6 +17,24 @@
extern
"C"
{
#endif
// Note: Fixed size intermediate buffers, place limits on parameters
// of some functions. 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
// (2) Interpolate temp vertically to derive the sub-pixel result.
// Deriving the maximum number of rows in the temp buffer (135):
// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
// --Largest block size is 64x64 pixels.
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
// original frame (in 1/16th pixel units).
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
# define MAX_EXT_SIZE 263
#else
# define MAX_EXT_SIZE 135
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
typedef
void
(
*
convolve_fn_t
)(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
...
...
vpx_dsp/vpx_dsp_common.h
View file @
938b8dfc
...
...
@@ -20,6 +20,12 @@
extern
"C"
{
#endif
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
# define MAX_CU_SIZE 128
#else
# define MAX_CU_SIZE 64
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y))
#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y))
...
...
vpx_dsp/vpx_dsp_rtcd_defs.pl
View file @
938b8dfc
...
...
@@ -466,52 +466,44 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Sub Pixel Filters
#
add_proto
qw/void vpx_convolve_copy/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve_copy neon dspr2 msa/
,
"
$sse2_x86inc
";
add_proto
qw/void vpx_convolve_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve_avg neon dspr2 msa/
,
"
$sse2_x86inc
";
add_proto
qw/void vpx_convolve8/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/
,
"
$avx2_ssse3
";
add_proto
qw/void vpx_convolve8_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/
,
"
$avx2_ssse3
";
add_proto
qw/void vpx_convolve8_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/
,
"
$avx2_ssse3
";
add_proto
qw/void vpx_convolve8_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/
;
add_proto
qw/void vpx_convolve_copy/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_avg_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/
;
add_proto
qw/void vpx_convolve8_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/
;
add_proto
qw/void vpx_scaled_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_2d ssse3/
;
add_proto
qw/void vpx_scaled_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_horiz/
;
add_proto
qw/void vpx_scaled_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_vert/
;
add_proto
qw/void vpx_scaled_avg_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_avg_2d/
;
add_proto
qw/void vpx_scaled_avg_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_avg_horiz/
;
add_proto
qw/void vpx_scaled_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_avg_vert/
;
add_proto
qw/void vpx_convolve8_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_avg_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_avg_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve_copy /
,
"
$sse2_x86inc
";
specialize
qw/vpx_convolve_avg /
,
"
$sse2_x86inc
";
specialize
qw/vpx_convolve8 sse2 ssse3/
,
"
$avx2_ssse3
";
specialize
qw/vpx_convolve8_horiz sse2 ssse3/
,
"
$avx2_ssse3
";
specialize
qw/vpx_convolve8_vert sse2 ssse3/
,
"
$avx2_ssse3
";
specialize
qw/vpx_convolve8_avg sse2 ssse3/
;
specialize
qw/vpx_convolve8_avg_horiz sse2 ssse3/
;
specialize
qw/vpx_convolve8_avg_vert sse2 ssse3/
;
specialize
qw/vpx_scaled_2d ssse3/
;
# TODO(any): These need to be extended to up to 128x128 block sizes
if
(
!
(
vpx_config
("
CONFIG_VP10
")
eq
"
yes
"
&&
vpx_config
("
CONFIG_EXT_PARTITION
")
eq
"
yes
"))
{
specialize
qw/vpx_convolve_copy neon dspr2 msa/
;
specialize
qw/vpx_convolve_avg neon dspr2 msa/
;
specialize
qw/vpx_convolve8 neon dspr2 msa/
;
specialize
qw/vpx_convolve8_horiz neon dspr2 msa/
;
specialize
qw/vpx_convolve8_vert neon dspr2 msa/
;
specialize
qw/vpx_convolve8_avg neon dspr2 msa/
;
specialize
qw/vpx_convolve8_avg_horiz neon dspr2 msa/
;
specialize
qw/vpx_convolve8_avg_vert neon dspr2 msa/
;
}
if
(
vpx_config
("
CONFIG_VP9_HIGHBITDEPTH
")
eq
"
yes
")
{
#
# Sub Pixel Filters
#
add_proto
qw/void vpx_highbd_convolve_copy/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps
";
specialize
qw/vpx_highbd_convolve_copy/
,
"
$sse2_x86inc
";
...
...
vpx_dsp/x86/convolve.h
View file @
938b8dfc
...
...
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_dsp/vpx_convolve.h"
typedef
void
filter8_1dfunction
(
const
uint8_t
*
src_ptr
,
...
...
@@ -112,25 +113,27 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
int w, int h) { \
assert(filter_x[3] != 128); \
assert(filter_y[3] != 128); \
assert(w <=
64
); \
assert(h <=
64
); \
assert(w <=
MAX_CU_SIZE
); \
assert(h <=
MAX_CU_SIZE
); \
assert(x_step_q4 == 16); \
assert(y_step_q4 == 16); \
if (filter_x[0] || filter_x[1] || filter_x[2]|| \
filter_y[0] || filter_y[1] || filter_y[2]) { \
DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
fdata2, MAX_CU_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 7); \
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_CU_SIZE, MAX_CU_SIZE, \
dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} else { \
DECLARE_ALIGNED(16, uint8_t, fdata2[
64 * 65
]); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2,
64
, \
DECLARE_ALIGNED(16, uint8_t, fdata2[
MAX_CU_SIZE * (MAX_CU_SIZE+1)
]); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2,
MAX_CU_SIZE
, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 1); \
vpx_convolve8_##avg##vert_##opt(fdata2,
64
, dst, dst_stride, \
vpx_convolve8_##avg##vert_##opt(fdata2,
MAX_CU_SIZE
, dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} \
...
...
@@ -250,31 +253,40 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \
int w, int h, int bd) { \
assert(w <=
64
); \
assert(h <=
64
); \
assert(w <=
MAX_CU_SIZE
); \
assert(h <=
MAX_CU_SIZE
); \