Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
c7d77b32
Commit
c7d77b32
authored
Mar 07, 2016
by
Debargha Mukherjee
Committed by
Gerrit Code Review
Mar 07, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Extend convolution functions to 128x128 for ext-partition." into nextgenv2
parents
6adfba7c
938b8dfc
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
366 additions
and
134 deletions
+366
-134
test/convolve_test.cc
test/convolve_test.cc
+100
-28
test/masked_sad_test.cc
test/masked_sad_test.cc
+0
-2
test/masked_variance_test.cc
test/masked_variance_test.cc
+0
-2
vpx_dsp/vpx_convolve.c
vpx_dsp/vpx_convolve.c
+30
-23
vpx_dsp/vpx_convolve.h
vpx_dsp/vpx_convolve.h
+18
-0
vpx_dsp/vpx_dsp_common.h
vpx_dsp/vpx_dsp_common.h
+6
-0
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/vpx_dsp_rtcd_defs.pl
+35
-43
vpx_dsp/x86/convolve.h
vpx_dsp/x86/convolve.h
+35
-23
vpx_dsp/x86/vpx_convolve_copy_sse2.asm
vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+116
-2
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+26
-11
No files found.
test/convolve_test.cc
View file @
c7d77b32
...
...
@@ -28,7 +28,7 @@
namespace
{
static
const
unsigned
int
kMaxDimension
=
64
;
static
const
unsigned
int
kMaxDimension
=
MAX_CU_SIZE
;
typedef
void
(
*
ConvolveFunc
)(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
...
...
@@ -102,7 +102,7 @@ void filter_block2d_8_c(const uint8_t *src_ptr,
// = 23
// and filter_max_width = 16
//
uint8_t
intermediate_buffer
[
71
*
kMaxDimension
];
uint8_t
intermediate_buffer
[
(
kMaxDimension
+
8
)
*
kMaxDimension
];
const
int
intermediate_next_stride
=
1
-
intermediate_height
*
output_width
;
// Horizontal pass (src -> transposed intermediate).
...
...
@@ -183,9 +183,9 @@ void filter_average_block2d_8_c(const uint8_t *src_ptr,
assert
(
output_width
<=
kMaxDimension
);
assert
(
output_height
<=
kMaxDimension
);
filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
64
,
filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
kMaxDimension
,
output_width
,
output_height
);
block2d_average_c
(
tmp
,
64
,
dst_ptr
,
dst_stride
,
block2d_average_c
(
tmp
,
kMaxDimension
,
dst_ptr
,
dst_stride
,
output_width
,
output_height
);
}
...
...
@@ -214,7 +214,7 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
* = 23
* and filter_max_width = 16
*/
uint16_t
intermediate_buffer
[
71
*
kMaxDimension
];
uint16_t
intermediate_buffer
[
(
kMaxDimension
+
8
)
*
kMaxDimension
];
const
int
intermediate_next_stride
=
1
-
intermediate_height
*
output_width
;
// Horizontal pass (src -> transposed intermediate).
...
...
@@ -302,9 +302,10 @@ void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
assert
(
output_width
<=
kMaxDimension
);
assert
(
output_height
<=
kMaxDimension
);
highbd_filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
64
,
highbd_filter_block2d_8_c
(
src_ptr
,
src_stride
,
HFilter
,
VFilter
,
tmp
,
kMaxDimension
,
output_width
,
output_height
,
bd
);
highbd_block2d_average_c
(
tmp
,
64
,
dst_ptr
,
dst_stride
,
highbd_block2d_average_c
(
tmp
,
kMaxDimension
,
dst_ptr
,
dst_stride
,
output_width
,
output_height
);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
...
...
@@ -351,7 +352,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
protected:
static
const
int
kDataAlignment
=
16
;
static
const
int
kOuterBlockSize
=
256
;
static
const
int
kOuterBlockSize
=
4
*
kMaxDimension
;
static
const
int
kInputStride
=
kOuterBlockSize
;
static
const
int
kOutputStride
=
kOuterBlockSize
;
static
const
int
kInputBufferSize
=
kOuterBlockSize
*
kOuterBlockSize
;
...
...
@@ -414,7 +415,8 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
void
CopyOutputToRef
()
{
memcpy
(
output_ref_
,
output_
,
kOutputBufferSize
);
#if CONFIG_VP9_HIGHBITDEPTH
memcpy
(
output16_ref_
,
output16_
,
kOutputBufferSize
);
memcpy
(
output16_ref_
,
output16_
,
kOutputBufferSize
*
sizeof
(
*
output16_ref_
));
#endif
}
...
...
@@ -426,41 +428,41 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
}
uint8_t
*
input
()
const
{
const
int
index
=
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
();
#if CONFIG_VP9_HIGHBITDEPTH
if
(
UUT_
->
use_highbd_
==
0
)
{
return
input_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
input_
+
index
;
}
else
{
return
CONVERT_TO_BYTEPTR
(
input16_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
());
return
CONVERT_TO_BYTEPTR
(
input16_
)
+
index
;
}
#else
return
input_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
input_
+
index
;
#endif
}
uint8_t
*
output
()
const
{
const
int
index
=
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
();
#if CONFIG_VP9_HIGHBITDEPTH
if
(
UUT_
->
use_highbd_
==
0
)
{
return
output_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_
+
index
;
}
else
{
return
CONVERT_TO_BYTEPTR
(
output16_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
());
return
CONVERT_TO_BYTEPTR
(
output16_
+
index
);
}
#else
return
output_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_
+
index
;
#endif
}
uint8_t
*
output_ref
()
const
{
const
int
index
=
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
();
#if CONFIG_VP9_HIGHBITDEPTH
if
(
UUT_
->
use_highbd_
==
0
)
{
return
output_ref_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_ref_
+
index
;
}
else
{
return
CONVERT_TO_BYTEPTR
(
output16_ref_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
());
return
CONVERT_TO_BYTEPTR
(
output16_ref_
+
index
);
}
#else
return
output_ref_
+
BorderTop
()
*
kOuterBlockSize
+
BorderLeft
()
;
return
output_ref_
+
index
;
#endif
}
...
...
@@ -1035,6 +1037,11 @@ const ConvolveFunctions convolve8_c(
wrap_convolve8_vert_c_8
,
wrap_convolve8_avg_vert_c_8
,
wrap_convolve8_c_8
,
wrap_convolve8_avg_c_8
,
8
);
INSTANTIATE_TEST_CASE_P
(
C_8
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_c
),
make_tuple
(
64
,
128
,
&
convolve8_c
),
make_tuple
(
128
,
128
,
&
convolve8_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_c
),
make_tuple
(
8
,
4
,
&
convolve8_c
),
make_tuple
(
4
,
8
,
&
convolve8_c
),
...
...
@@ -1057,6 +1064,11 @@ const ConvolveFunctions convolve10_c(
wrap_convolve8_vert_c_10
,
wrap_convolve8_avg_vert_c_10
,
wrap_convolve8_c_10
,
wrap_convolve8_avg_c_10
,
10
);
INSTANTIATE_TEST_CASE_P
(
C_10
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve10_c
),
make_tuple
(
64
,
128
,
&
convolve10_c
),
make_tuple
(
128
,
128
,
&
convolve10_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve10_c
),
make_tuple
(
8
,
4
,
&
convolve10_c
),
make_tuple
(
4
,
8
,
&
convolve10_c
),
...
...
@@ -1079,6 +1091,11 @@ const ConvolveFunctions convolve12_c(
wrap_convolve8_vert_c_12
,
wrap_convolve8_avg_vert_c_12
,
wrap_convolve8_c_12
,
wrap_convolve8_avg_c_12
,
12
);
INSTANTIATE_TEST_CASE_P
(
C_12
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve12_c
),
make_tuple
(
64
,
128
,
&
convolve12_c
),
make_tuple
(
128
,
128
,
&
convolve12_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve12_c
),
make_tuple
(
8
,
4
,
&
convolve12_c
),
make_tuple
(
4
,
8
,
&
convolve12_c
),
...
...
@@ -1105,6 +1122,11 @@ const ConvolveFunctions convolve8_c(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
C
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_c
),
make_tuple
(
64
,
128
,
&
convolve8_c
),
make_tuple
(
128
,
128
,
&
convolve8_c
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_c
),
make_tuple
(
8
,
4
,
&
convolve8_c
),
make_tuple
(
4
,
8
,
&
convolve8_c
),
...
...
@@ -1158,7 +1180,12 @@ const ConvolveFunctions convolve12_sse2(
wrap_convolve8_horiz_sse2_12
,
wrap_convolve8_avg_horiz_sse2_12
,
wrap_convolve8_vert_sse2_12
,
wrap_convolve8_avg_vert_sse2_12
,
wrap_convolve8_sse2_12
,
wrap_convolve8_avg_sse2_12
,
12
);
INSTANTIATE_TEST_CASE_P
(
SSE2
,
ConvolveTest
,
::
testing
::
Values
(
INSTANTIATE_TEST_CASE_P
(
SSE2_8
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
128
,
&
convolve8_sse2
),
make_tuple
(
128
,
128
,
&
convolve8_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_sse2
),
make_tuple
(
8
,
4
,
&
convolve8_sse2
),
make_tuple
(
4
,
8
,
&
convolve8_sse2
),
...
...
@@ -1171,7 +1198,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
make_tuple
(
32
,
32
,
&
convolve8_sse2
),
make_tuple
(
64
,
32
,
&
convolve8_sse2
),
make_tuple
(
32
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
64
,
&
convolve8_sse2
)));
INSTANTIATE_TEST_CASE_P
(
SSE2_10
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve10_sse2
),
make_tuple
(
64
,
128
,
&
convolve10_sse2
),
make_tuple
(
128
,
128
,
&
convolve10_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve10_sse2
),
make_tuple
(
8
,
4
,
&
convolve10_sse2
),
make_tuple
(
4
,
8
,
&
convolve10_sse2
),
...
...
@@ -1184,7 +1217,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
make_tuple
(
32
,
32
,
&
convolve10_sse2
),
make_tuple
(
64
,
32
,
&
convolve10_sse2
),
make_tuple
(
32
,
64
,
&
convolve10_sse2
),
make_tuple
(
64
,
64
,
&
convolve10_sse2
),
make_tuple
(
64
,
64
,
&
convolve10_sse2
)));
INSTANTIATE_TEST_CASE_P
(
SSE2_12
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve12_sse2
),
make_tuple
(
64
,
128
,
&
convolve12_sse2
),
make_tuple
(
128
,
128
,
&
convolve12_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve12_sse2
),
make_tuple
(
8
,
4
,
&
convolve12_sse2
),
make_tuple
(
4
,
8
,
&
convolve12_sse2
),
...
...
@@ -1213,6 +1252,11 @@ const ConvolveFunctions convolve8_sse2(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
SSE2
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_sse2
),
make_tuple
(
64
,
128
,
&
convolve8_sse2
),
make_tuple
(
128
,
128
,
&
convolve8_sse2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_sse2
),
make_tuple
(
8
,
4
,
&
convolve8_sse2
),
make_tuple
(
4
,
8
,
&
convolve8_sse2
),
...
...
@@ -1237,9 +1281,14 @@ const ConvolveFunctions convolve8_ssse3(
vpx_convolve8_ssse3
,
vpx_convolve8_avg_ssse3
,
vpx_scaled_horiz_c
,
vpx_scaled_avg_horiz_c
,
vpx_scaled_vert_c
,
vpx_scaled_avg_vert_c
,
vpx_scaled_2d_
c
,
vpx_scaled_avg_2d_c
,
0
);
vpx_scaled_2d_
ssse3
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_ssse3
),
make_tuple
(
64
,
128
,
&
convolve8_ssse3
),
make_tuple
(
128
,
128
,
&
convolve8_ssse3
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_ssse3
),
make_tuple
(
8
,
4
,
&
convolve8_ssse3
),
make_tuple
(
4
,
8
,
&
convolve8_ssse3
),
...
...
@@ -1266,6 +1315,11 @@ const ConvolveFunctions convolve8_avx2(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
AVX2
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_avx2
),
make_tuple
(
64
,
128
,
&
convolve8_avx2
),
make_tuple
(
128
,
128
,
&
convolve8_avx2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_avx2
),
make_tuple
(
8
,
4
,
&
convolve8_avx2
),
make_tuple
(
4
,
8
,
&
convolve8_avx2
),
...
...
@@ -1281,7 +1335,8 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
make_tuple
(
64
,
64
,
&
convolve8_avx2
)));
#endif // HAVE_AVX2 && HAVE_SSSE3
#if HAVE_NEON
// TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes
#if HAVE_NEON && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
#if HAVE_NEON_ASM
const
ConvolveFunctions
convolve8_neon
(
vpx_convolve_copy_neon
,
vpx_convolve_avg_neon
,
...
...
@@ -1303,6 +1358,11 @@ const ConvolveFunctions convolve8_neon(
#endif // HAVE_NEON_ASM
INSTANTIATE_TEST_CASE_P
(
NEON
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_neon
),
make_tuple
(
64
,
128
,
&
convolve8_neon
),
make_tuple
(
128
,
128
,
&
convolve8_neon
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_neon
),
make_tuple
(
8
,
4
,
&
convolve8_neon
),
make_tuple
(
4
,
8
,
&
convolve8_neon
),
...
...
@@ -1318,7 +1378,8 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
make_tuple
(
64
,
64
,
&
convolve8_neon
)));
#endif // HAVE_NEON
#if HAVE_DSPR2
// TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
#if HAVE_DSPR2 && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
const
ConvolveFunctions
convolve8_dspr2
(
vpx_convolve_copy_dspr2
,
vpx_convolve_avg_dspr2
,
vpx_convolve8_horiz_dspr2
,
vpx_convolve8_avg_horiz_dspr2
,
...
...
@@ -1329,6 +1390,11 @@ const ConvolveFunctions convolve8_dspr2(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
DSPR2
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_dspr2
),
make_tuple
(
64
,
128
,
&
convolve8_dspr2
),
make_tuple
(
128
,
128
,
&
convolve8_dspr2
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_dspr2
),
make_tuple
(
8
,
4
,
&
convolve8_dspr2
),
make_tuple
(
4
,
8
,
&
convolve8_dspr2
),
...
...
@@ -1344,7 +1410,8 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
make_tuple
(
64
,
64
,
&
convolve8_dspr2
)));
#endif
#if HAVE_MSA
// TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
#if HAVE_MSA && !(CONFIG_VP10 && CONFIG_EXT_PARTITION)
const
ConvolveFunctions
convolve8_msa
(
vpx_convolve_copy_msa
,
vpx_convolve_avg_msa
,
vpx_convolve8_horiz_msa
,
vpx_convolve8_avg_horiz_msa
,
...
...
@@ -1355,6 +1422,11 @@ const ConvolveFunctions convolve8_msa(
vpx_scaled_2d_c
,
vpx_scaled_avg_2d_c
,
0
);
INSTANTIATE_TEST_CASE_P
(
MSA
,
ConvolveTest
,
::
testing
::
Values
(
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
128
,
64
,
&
convolve8_msa
),
make_tuple
(
64
,
128
,
&
convolve8_msa
),
make_tuple
(
128
,
128
,
&
convolve8_msa
),
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
make_tuple
(
4
,
4
,
&
convolve8_msa
),
make_tuple
(
8
,
4
,
&
convolve8_msa
),
make_tuple
(
4
,
8
,
&
convolve8_msa
),
...
...
test/masked_sad_test.cc
View file @
c7d77b32
...
...
@@ -22,8 +22,6 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#define MAX_CU_SIZE 128
using
libvpx_test
::
ACMRandom
;
namespace
{
...
...
test/masked_variance_test.cc
View file @
c7d77b32
...
...
@@ -25,8 +25,6 @@
#include "vpx_dsp/vpx_filter.h"
#include "vpx_mem/vpx_mem.h"
#define MAX_CU_SIZE 128
using
libvpx_test
::
ACMRandom
;
namespace
{
...
...
vpx_dsp/vpx_convolve.c
View file @
c7d77b32
...
...
@@ -130,18 +130,21 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint8_t
temp
[
135
*
64
];
uint8_t
temp
[
MAX_EXT_SIZE
*
MAX_CU_SIZE
];
int
intermediate_height
=
(((
h
-
1
)
*
y_step_q4
+
y0_q4
)
>>
SUBPEL_BITS
)
+
SUBPEL_TAPS
;
assert
(
w
<=
64
);
assert
(
h
<=
64
);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
assert
(
y_step_q4
<=
32
);
assert
(
x_step_q4
<=
32
);
convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
temp
,
64
,
convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
temp
,
MAX_CU_SIZE
,
x_filters
,
x0_q4
,
x_step_q4
,
w
,
intermediate_height
);
convolve_vert
(
temp
+
64
*
(
SUBPEL_TAPS
/
2
-
1
),
64
,
dst
,
dst_stride
,
convolve_vert
(
temp
+
MAX_CU_SIZE
*
(
SUBPEL_TAPS
/
2
-
1
),
MAX_CU_SIZE
,
dst
,
dst_stride
,
y_filters
,
y0_q4
,
y_step_q4
,
w
,
h
);
}
...
...
@@ -237,13 +240,14 @@ void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
/* Fixed size intermediate buffer places limits on parameters. */
DECLARE_ALIGNED
(
16
,
uint8_t
,
temp
[
64
*
64
]);
assert
(
w
<=
64
);
assert
(
h
<=
64
);
DECLARE_ALIGNED
(
16
,
uint8_t
,
temp
[
MAX_CU_SIZE
*
MAX_CU_SIZE
]);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
vpx_convolve8_c
(
src
,
src_stride
,
temp
,
64
,
vpx_convolve8_c
(
src
,
src_stride
,
temp
,
MAX_CU_SIZE
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
vpx_convolve_avg_c
(
temp
,
64
,
dst
,
dst_stride
,
NULL
,
0
,
NULL
,
0
,
w
,
h
);
vpx_convolve_avg_c
(
temp
,
MAX_CU_SIZE
,
dst
,
dst_stride
,
NULL
,
0
,
NULL
,
0
,
w
,
h
);
}
void
vpx_convolve_copy_c
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
...
...
@@ -459,22 +463,23 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint16_t
temp
[
64
*
135
];
uint16_t
temp
[
MAX_EXT_SIZE
*
MAX_CU_SIZE
];
int
intermediate_height
=
(((
h
-
1
)
*
y_step_q4
+
y0_q4
)
>>
SUBPEL_BITS
)
+
SUBPEL_TAPS
;
assert
(
w
<=
64
);
assert
(
h
<=
64
);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
assert
(
y_step_q4
<=
32
);
assert
(
x_step_q4
<=
32
);
highbd_convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
64
,
highbd_convolve_horiz
(
src
-
src_stride
*
(
SUBPEL_TAPS
/
2
-
1
),
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
MAX_CU_SIZE
,
x_filters
,
x0_q4
,
x_step_q4
,
w
,
intermediate_height
,
bd
);
highbd_convolve_vert
(
CONVERT_TO_BYTEPTR
(
temp
)
+
64
*
(
SUBPEL_TAPS
/
2
-
1
),
64
,
dst
,
dst_stride
,
y_filters
,
y0_q4
,
y_step_q4
,
w
,
h
,
bd
);
highbd_convolve_vert
(
CONVERT_TO_BYTEPTR
(
temp
)
+
MAX_CU_SIZE
*
(
SUBPEL_TAPS
/
2
-
1
),
MAX_CU_SIZE
,
dst
,
dst_stride
,
y_filters
,
y0_q4
,
y_step_q4
,
w
,
h
,
bd
);
}
...
...
@@ -556,13 +561,15 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
,
int
bd
)
{
// Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED
(
16
,
uint16_t
,
temp
[
64
*
64
]);
assert
(
w
<=
64
);
assert
(
h
<=
64
);
DECLARE_ALIGNED
(
16
,
uint16_t
,
temp
[
MAX_CU_SIZE
*
MAX_CU_SIZE
]);
assert
(
w
<=
MAX_CU_SIZE
);
assert
(
h
<=
MAX_CU_SIZE
);
vpx_highbd_convolve8_c
(
src
,
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
64
,
vpx_highbd_convolve8_c
(
src
,
src_stride
,
CONVERT_TO_BYTEPTR
(
temp
),
MAX_CU_SIZE
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
,
bd
);
vpx_highbd_convolve_avg_c
(
CONVERT_TO_BYTEPTR
(
temp
),
64
,
dst
,
dst_stride
,
vpx_highbd_convolve_avg_c
(
CONVERT_TO_BYTEPTR
(
temp
),
MAX_CU_SIZE
,
dst
,
dst_stride
,
NULL
,
0
,
NULL
,
0
,
w
,
h
,
bd
);
}
...
...
vpx_dsp/vpx_convolve.h
View file @
c7d77b32
...
...
@@ -17,6 +17,24 @@
extern
"C"
{
#endif
// Note: Fixed size intermediate buffers, place limits on parameters
// of some functions. 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
// (2) Interpolate temp vertically to derive the sub-pixel result.
// Deriving the maximum number of rows in the temp buffer (135):
// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
// --Largest block size is 64x64 pixels.
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
// original frame (in 1/16th pixel units).
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
# define MAX_EXT_SIZE 263
#else
# define MAX_EXT_SIZE 135
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
typedef
void
(
*
convolve_fn_t
)(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
...
...
vpx_dsp/vpx_dsp_common.h
View file @
c7d77b32
...
...
@@ -20,6 +20,12 @@
extern
"C"
{
#endif
#if CONFIG_VP10 && CONFIG_EXT_PARTITION
# define MAX_CU_SIZE 128
#else
# define MAX_CU_SIZE 64
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y))
#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y))
...
...
vpx_dsp/vpx_dsp_rtcd_defs.pl
View file @
c7d77b32
...
...
@@ -466,52 +466,44 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Sub Pixel Filters
#
add_proto
qw/void vpx_convolve_copy/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve_copy neon dspr2 msa/
,
"
$sse2_x86inc
";
add_proto
qw/void vpx_convolve_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve_avg neon dspr2 msa/
,
"
$sse2_x86inc
";
add_proto
qw/void vpx_convolve8/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/
,
"
$avx2_ssse3
";
add_proto
qw/void vpx_convolve8_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/
,
"
$avx2_ssse3
";
add_proto
qw/void vpx_convolve8_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/
,
"
$avx2_ssse3
";
add_proto
qw/void vpx_convolve8_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/
;
add_proto
qw/void vpx_convolve_copy/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_avg/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_convolve8_avg_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/
;
add_proto
qw/void vpx_convolve8_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/
;
add_proto
qw/void vpx_scaled_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_2d ssse3/
;
add_proto
qw/void vpx_scaled_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_horiz/
;
add_proto
qw/void vpx_scaled_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_vert/
;
add_proto
qw/void vpx_scaled_avg_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_avg_2d/
;
add_proto
qw/void vpx_scaled_avg_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_avg_horiz/
;
add_proto
qw/void vpx_scaled_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_scaled_avg_vert/
;
add_proto
qw/void vpx_convolve8_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_avg_2d/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_avg_horiz/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
add_proto
qw/void vpx_scaled_avg_vert/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h
";
specialize
qw/vpx_convolve_copy /
,
"
$sse2_x86inc
";
specialize
qw/vpx_convolve_avg /
,
"
$sse2_x86inc
";
specialize
qw/vpx_convolve8 sse2 ssse3/
,
"
$avx2_ssse3
";
specialize
qw/vpx_convolve8_horiz sse2 ssse3/
,
"
$avx2_ssse3
";
specialize
qw/vpx_convolve8_vert sse2 ssse3/
,
"
$avx2_ssse3
";
specialize
qw/vpx_convolve8_avg sse2 ssse3/
;
specialize
qw/vpx_convolve8_avg_horiz sse2 ssse3/
;
specialize
qw/vpx_convolve8_avg_vert sse2 ssse3/
;
specialize
qw/vpx_scaled_2d ssse3/
;
# TODO(any): These need to be extended to up to 128x128 block sizes
if
(
!
(
vpx_config
("
CONFIG_VP10
")
eq
"
yes
"
&&
vpx_config
("
CONFIG_EXT_PARTITION
")
eq
"
yes
"))
{
specialize
qw/vpx_convolve_copy neon dspr2 msa/
;
specialize
qw/vpx_convolve_avg neon dspr2 msa/
;
specialize
qw/vpx_convolve8 neon dspr2 msa/
;
specialize
qw/vpx_convolve8_horiz neon dspr2 msa/
;
specialize
qw/vpx_convolve8_vert neon dspr2 msa/
;
specialize
qw/vpx_convolve8_avg neon dspr2 msa/
;
specialize
qw/vpx_convolve8_avg_horiz neon dspr2 msa/
;
specialize
qw/vpx_convolve8_avg_vert neon dspr2 msa/
;
}
if
(
vpx_config
("
CONFIG_VP9_HIGHBITDEPTH
")
eq
"
yes
")
{
#
# Sub Pixel Filters
#
add_proto
qw/void vpx_highbd_convolve_copy/
,
"
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps
";
specialize
qw/vpx_highbd_convolve_copy/
,
"
$sse2_x86inc
";
...
...
vpx_dsp/x86/convolve.h
View file @
c7d77b32
...
...
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_dsp/vpx_convolve.h"
typedef
void
filter8_1dfunction
(
const
uint8_t
*
src_ptr
,
...
...
@@ -112,25 +113,27 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
int w, int h) { \
assert(filter_x[3] != 128); \
assert(filter_y[3] != 128); \
assert(w <=
64
); \
assert(h <=
64
); \
assert(w <=
MAX_CU_SIZE
); \
assert(h <=
MAX_CU_SIZE
); \
assert(x_step_q4 == 16); \
assert(y_step_q4 == 16); \
if (filter_x[0] || filter_x[1] || filter_x[2]|| \
filter_y[0] || filter_y[1] || filter_y[2]) { \
DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
fdata2, MAX_CU_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 7); \
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_CU_SIZE, MAX_CU_SIZE, \
dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} else { \
DECLARE_ALIGNED(16, uint8_t, fdata2[
64 * 65
]); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2,
64
, \
DECLARE_ALIGNED(16, uint8_t, fdata2[
MAX_CU_SIZE * (MAX_CU_SIZE+1)
]); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2,
MAX_CU_SIZE
, \
filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 1); \
vpx_convolve8_##avg##vert_##opt(fdata2,
64
, dst, dst_stride, \
vpx_convolve8_##avg##vert_##opt(fdata2,
MAX_CU_SIZE
, dst, dst_stride, \
filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \
} \
...
...
@@ -250,31 +253,40 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \
int w, int h, int bd) { \
assert(w <=
64
); \
assert(h <=
64
); \
assert(w <=
MAX_CU_SIZE
); \