Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
dcb29c14
Commit
dcb29c14
authored
Dec 02, 2014
by
Debargha Mukherjee
Committed by
Gerrit Code Review
Dec 02, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Added high bitdepth sse2 transform functions"
parents
bf758b6a
7e40a55e
Changes
14
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
3370 additions
and
1456 deletions
+3370
-1456
test/dct16x16_test.cc
test/dct16x16_test.cc
+174
-12
test/dct32x32_test.cc
test/dct32x32_test.cc
+28
-8
test/fdct4x4_test.cc
test/fdct4x4_test.cc
+43
-6
test/fdct8x8_test.cc
test/fdct8x8_test.cc
+163
-8
vp9/common/vp9_blockd.h
vp9/common/vp9_blockd.h
+1
-50
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_rtcd_defs.pl
+57
-36
vp9/common/x86/vp9_idct_intrin_sse2.c
vp9/common/x86/vp9_idct_intrin_sse2.c
+571
-0
vp9/encoder/vp9_encodeframe.c
vp9/encoder/vp9_encodeframe.c
+2
-2
vp9/encoder/x86/vp9_dct32x32_sse2.c
vp9/encoder/x86/vp9_dct32x32_sse2.c
+665
-226
vp9/encoder/x86/vp9_dct_impl_sse2.c
vp9/encoder/x86/vp9_dct_impl_sse2.c
+1011
-0
vp9/encoder/x86/vp9_dct_mmx.asm
vp9/encoder/x86/vp9_dct_mmx.asm
+31
-0
vp9/encoder/x86/vp9_dct_sse2.c
vp9/encoder/x86/vp9_dct_sse2.c
+249
-1108
vp9/encoder/x86/vp9_dct_sse2.h
vp9/encoder/x86/vp9_dct_sse2.h
+373
-0
vp9/vp9cx.mk
vp9/vp9cx.mk
+2
-0
No files found.
test/dct16x16_test.cc
View file @
dcb29c14
...
...
@@ -264,6 +264,8 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
typedef
std
::
tr1
::
tuple
<
FdctFunc
,
IdctFunc
,
int
,
vpx_bit_depth_t
>
Dct16x16Param
;
typedef
std
::
tr1
::
tuple
<
FhtFunc
,
IhtFunc
,
int
,
vpx_bit_depth_t
>
Ht16x16Param
;
typedef
std
::
tr1
::
tuple
<
IdctFunc
,
IdctFunc
,
int
,
vpx_bit_depth_t
>
Idct16x16Param
;
void
fdct16x16_ref
(
const
int16_t
*
in
,
tran_low_t
*
out
,
int
stride
,
int
/*tx_type*/
)
{
...
...
@@ -311,7 +313,33 @@ void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
void
iht16x16_12
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
,
int
tx_type
)
{
vp9_highbd_iht16x16_256_add_c
(
in
,
out
,
stride
,
tx_type
,
12
);
}
#endif
void
idct16x16_10_add_10_c
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct16x16_10_add_c
(
in
,
out
,
stride
,
10
);
}
void
idct16x16_10_add_12_c
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct16x16_10_add_c
(
in
,
out
,
stride
,
12
);
}
#if HAVE_SSE2
void
idct16x16_256_add_10_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct16x16_256_add_sse2
(
in
,
out
,
stride
,
10
);
}
void
idct16x16_256_add_12_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct16x16_256_add_sse2
(
in
,
out
,
stride
,
12
);
}
void
idct16x16_10_add_10_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct16x16_10_add_sse2
(
in
,
out
,
stride
,
10
);
}
void
idct16x16_10_add_12_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct16x16_10_add_sse2
(
in
,
out
,
stride
,
12
);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
class
Trans16x16TestBase
{
public:
...
...
@@ -518,7 +546,7 @@ class Trans16x16TestBase {
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
dst16
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
src16
,
kNumCoeffs
);
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
for
(
int
i
=
0
;
i
<
count_test_block
;
++
i
)
{
double
out_r
[
kNumCoeffs
];
...
...
@@ -534,13 +562,13 @@ class Trans16x16TestBase {
src16
[
j
]
=
rnd
.
Rand16
()
&
mask_
;
dst16
[
j
]
=
rnd
.
Rand16
()
&
mask_
;
in
[
j
]
=
src16
[
j
]
-
dst16
[
j
];
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
}
}
reference_16x16_dct_2d
(
in
,
out_r
);
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
coeff
[
j
]
=
round
(
out_r
[
j
]
);
coeff
[
j
]
=
static_cast
<
tran_low_t
>
(
round
(
out_r
[
j
])
);
if
(
bit_depth_
==
VPX_BITS_8
)
{
ASM_REGISTER_STATE_CHECK
(
RunInvTxfm
(
coeff
,
dst
,
16
));
...
...
@@ -548,7 +576,7 @@ class Trans16x16TestBase {
}
else
{
ASM_REGISTER_STATE_CHECK
(
RunInvTxfm
(
coeff
,
CONVERT_TO_BYTEPTR
(
dst16
),
16
));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
}
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
{
...
...
@@ -557,7 +585,7 @@ class Trans16x16TestBase {
bit_depth_
==
VPX_BITS_8
?
dst
[
j
]
-
src
[
j
]
:
dst16
[
j
]
-
src16
[
j
];
#else
const
uint32_t
diff
=
dst
[
j
]
-
src
[
j
];
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
const
uint32_t
error
=
diff
*
diff
;
EXPECT_GE
(
1u
,
error
)
<<
"Error: 16x16 IDCT has error "
<<
error
...
...
@@ -565,6 +593,64 @@ class Trans16x16TestBase {
}
}
}
void
CompareInvReference
(
IdctFunc
ref_txfm
,
int
thresh
)
{
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
const
int
count_test_block
=
10000
;
const
int
eob
=
10
;
const
int16_t
*
scan
=
vp9_default_scan_orders
[
TX_16X16
].
scan
;
DECLARE_ALIGNED_ARRAY
(
16
,
tran_low_t
,
coeff
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
dst
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
ref
,
kNumCoeffs
);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
dst16
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
ref16
,
kNumCoeffs
);
#endif // CONFIG_VP9_HIGHBITDEPTH
for
(
int
i
=
0
;
i
<
count_test_block
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
{
if
(
j
<
eob
)
{
// Random values less than the threshold, either positive or negative
coeff
[
scan
[
j
]]
=
rnd
(
thresh
)
*
(
1
-
2
*
(
i
%
2
));
}
else
{
coeff
[
scan
[
j
]]
=
0
;
}
if
(
bit_depth_
==
VPX_BITS_8
)
{
dst
[
j
]
=
0
;
ref
[
j
]
=
0
;
#if CONFIG_VP9_HIGHBITDEPTH
}
else
{
dst16
[
j
]
=
0
;
ref16
[
j
]
=
0
;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
if
(
bit_depth_
==
VPX_BITS_8
)
{
ref_txfm
(
coeff
,
ref
,
pitch_
);
ASM_REGISTER_STATE_CHECK
(
RunInvTxfm
(
coeff
,
dst
,
pitch_
));
}
else
{
#if CONFIG_VP9_HIGHBITDEPTH
ref_txfm
(
coeff
,
CONVERT_TO_BYTEPTR
(
ref16
),
pitch_
);
ASM_REGISTER_STATE_CHECK
(
RunInvTxfm
(
coeff
,
CONVERT_TO_BYTEPTR
(
dst16
),
pitch_
));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
{
#if CONFIG_VP9_HIGHBITDEPTH
const
uint32_t
diff
=
bit_depth_
==
VPX_BITS_8
?
dst
[
j
]
-
ref
[
j
]
:
dst16
[
j
]
-
ref16
[
j
];
#else
const
uint32_t
diff
=
dst
[
j
]
-
ref
[
j
];
#endif // CONFIG_VP9_HIGHBITDEPTH
const
uint32_t
error
=
diff
*
diff
;
EXPECT_EQ
(
0u
,
error
)
<<
"Error: 16x16 IDCT Comparison has error "
<<
error
<<
" at index "
<<
j
;
}
}
}
int
pitch_
;
int
tx_type_
;
vpx_bit_depth_t
bit_depth_
;
...
...
@@ -590,10 +676,10 @@ class Trans16x16DCT
mask_
=
(
1
<<
bit_depth_
)
-
1
;
#if CONFIG_VP9_HIGHBITDEPTH
switch
(
bit_depth_
)
{
case
10
:
case
VPX_BITS_
10
:
inv_txfm_ref
=
idct16x16_10_ref
;
break
;
case
12
:
case
VPX_BITS_
12
:
inv_txfm_ref
=
idct16x16_12_ref
;
break
;
default:
...
...
@@ -703,6 +789,37 @@ TEST_P(Trans16x16HT, QuantCheck) {
RunQuantCheck
(
429
,
729
);
}
class
InvTrans16x16DCT
:
public
Trans16x16TestBase
,
public
::
testing
::
TestWithParam
<
Idct16x16Param
>
{
public:
virtual
~
InvTrans16x16DCT
()
{}
virtual
void
SetUp
()
{
ref_txfm_
=
GET_PARAM
(
0
);
inv_txfm_
=
GET_PARAM
(
1
);
thresh_
=
GET_PARAM
(
2
);
bit_depth_
=
GET_PARAM
(
3
);
pitch_
=
16
;
mask_
=
(
1
<<
bit_depth_
)
-
1
;
}
virtual
void
TearDown
()
{
libvpx_test
::
ClearSystemState
();
}
protected:
void
RunFwdTxfm
(
int16_t
*
in
,
tran_low_t
*
out
,
int
stride
)
{}
void
RunInvTxfm
(
tran_low_t
*
out
,
uint8_t
*
dst
,
int
stride
)
{
inv_txfm_
(
out
,
dst
,
stride
);
}
IdctFunc
ref_txfm_
;
IdctFunc
inv_txfm_
;
int
thresh_
;
};
TEST_P
(
InvTrans16x16DCT
,
CompareReference
)
{
CompareInvReference
(
ref_txfm_
,
thresh_
);
}
using
std
::
tr1
::
make_tuple
;
#if CONFIG_VP9_HIGHBITDEPTH
...
...
@@ -717,7 +834,7 @@ INSTANTIATE_TEST_CASE_P(
C
,
Trans16x16DCT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fdct16x16_c
,
&
vp9_idct16x16_256_add_c
,
0
,
VPX_BITS_8
)));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -743,7 +860,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht16x16_c
,
&
vp9_iht16x16_256_add_c
,
1
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht16x16_c
,
&
vp9_iht16x16_256_add_c
,
2
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht16x16_c
,
&
vp9_iht16x16_256_add_c
,
3
,
VPX_BITS_8
)));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -770,7 +887,52 @@ INSTANTIATE_TEST_CASE_P(
VPX_BITS_8
),
make_tuple
(
&
vp9_fht16x16_sse2
,
&
vp9_iht16x16_256_add_sse2
,
3
,
VPX_BITS_8
)));
#endif
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans16x16DCT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_highbd_fdct16x16_sse2
,
&
idct16x16_10
,
0
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fdct16x16_c
,
&
idct16x16_256_add_10_sse2
,
0
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fdct16x16_sse2
,
&
idct16x16_12
,
0
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fdct16x16_c
,
&
idct16x16_256_add_12_sse2
,
0
,
VPX_BITS_12
),
make_tuple
(
&
vp9_fdct16x16_sse2
,
&
vp9_idct16x16_256_add_c
,
0
,
VPX_BITS_8
)));
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans16x16HT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_10
,
0
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_10
,
1
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_10
,
2
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_10
,
3
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_12
,
0
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_12
,
1
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_12
,
2
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fht16x16_sse2
,
&
iht16x16_12
,
3
,
VPX_BITS_12
),
make_tuple
(
&
vp9_fht16x16_sse2
,
&
vp9_iht16x16_256_add_c
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht16x16_sse2
,
&
vp9_iht16x16_256_add_c
,
1
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht16x16_sse2
,
&
vp9_iht16x16_256_add_c
,
2
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht16x16_sse2
,
&
vp9_iht16x16_256_add_c
,
3
,
VPX_BITS_8
)));
// Optimizations take effect at a threshold of 3155, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P
(
SSE2
,
InvTrans16x16DCT
,
::
testing
::
Values
(
make_tuple
(
&
idct16x16_10_add_10_c
,
&
idct16x16_10_add_10_sse2
,
3167
,
VPX_BITS_10
),
make_tuple
(
&
idct16x16_10
,
&
idct16x16_256_add_10_sse2
,
3167
,
VPX_BITS_10
),
make_tuple
(
&
idct16x16_10_add_12_c
,
&
idct16x16_10_add_12_sse2
,
3167
,
VPX_BITS_12
),
make_tuple
(
&
idct16x16_12
,
&
idct16x16_256_add_12_sse2
,
3167
,
VPX_BITS_12
)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -778,5 +940,5 @@ INSTANTIATE_TEST_CASE_P(
::
testing
::
Values
(
make_tuple
(
&
vp9_fdct16x16_c
,
&
vp9_idct16x16_256_add_ssse3
,
0
,
VPX_BITS_8
)));
#endif
#endif
// HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
}
// namespace
test/dct32x32_test.cc
View file @
dcb29c14
...
...
@@ -79,6 +79,10 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
Trans32x32Param
;
#if CONFIG_VP9_HIGHBITDEPTH
void
idct32x32_8
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct32x32_1024_add_c
(
in
,
out
,
stride
,
8
);
}
void
idct32x32_10
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct32x32_1024_add_c
(
in
,
out
,
stride
,
10
);
}
...
...
@@ -86,7 +90,7 @@ void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
void
idct32x32_12
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct32x32_1024_add_c
(
in
,
out
,
stride
,
12
);
}
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
class
Trans32x32Test
:
public
::
testing
::
TestWithParam
<
Trans32x32Param
>
{
public:
...
...
@@ -114,7 +118,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
uint32_t
max_error
=
0
;
int64_t
total_error
=
0
;
const
int
count_test_block
=
1000
;
const
int
count_test_block
=
1000
0
;
DECLARE_ALIGNED_ARRAY
(
16
,
int16_t
,
test_input_block
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
tran_low_t
,
test_temp_block
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
dst
,
kNumCoeffs
);
...
...
@@ -127,7 +131,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
for
(
int
i
=
0
;
i
<
count_test_block
;
++
i
)
{
// Initialize a test block with input range [-mask_, mask_].
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
{
if
(
bit_depth_
==
8
)
{
if
(
bit_depth_
==
VPX_BITS_
8
)
{
src
[
j
]
=
rnd
.
Rand8
();
dst
[
j
]
=
rnd
.
Rand8
();
test_input_block
[
j
]
=
src
[
j
]
-
dst
[
j
];
...
...
@@ -282,7 +286,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
reference_32x32_dct_2d
(
in
,
out_r
);
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
coeff
[
j
]
=
round
(
out_r
[
j
]
);
coeff
[
j
]
=
static_cast
<
tran_low_t
>
(
round
(
out_r
[
j
])
);
if
(
bit_depth_
==
VPX_BITS_8
)
{
ASM_REGISTER_STATE_CHECK
(
inv_txfm_
(
coeff
,
dst
,
32
));
#if CONFIG_VP9_HIGHBITDEPTH
...
...
@@ -331,7 +335,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_c
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fdct32x32_rd_c
,
&
vp9_idct32x32_1024_add_c
,
1
,
VPX_BITS_8
)));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -341,7 +345,7 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_neon
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fdct32x32_rd_c
,
&
vp9_idct32x32_1024_add_neon
,
1
,
VPX_BITS_8
)));
#endif
#endif
// HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -351,7 +355,23 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_sse2
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fdct32x32_rd_sse2
,
&
vp9_idct32x32_1024_add_sse2
,
1
,
VPX_BITS_8
)));
#endif
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans32x32Test
,
::
testing
::
Values
(
make_tuple
(
&
vp9_highbd_fdct32x32_sse2
,
&
idct32x32_10
,
0
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fdct32x32_rd_sse2
,
&
idct32x32_10
,
1
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fdct32x32_sse2
,
&
idct32x32_12
,
0
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fdct32x32_rd_sse2
,
&
idct32x32_12
,
1
,
VPX_BITS_12
),
make_tuple
(
&
vp9_fdct32x32_sse2
,
&
vp9_idct32x32_1024_add_c
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fdct32x32_rd_sse2
,
&
vp9_idct32x32_1024_add_c
,
1
,
VPX_BITS_8
)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -361,5 +381,5 @@ INSTANTIATE_TEST_CASE_P(
&
vp9_idct32x32_1024_add_sse2
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fdct32x32_rd_avx2
,
&
vp9_idct32x32_1024_add_sse2
,
1
,
VPX_BITS_8
)));
#endif
#endif
// HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
}
// namespace
test/fdct4x4_test.cc
View file @
dcb29c14
...
...
@@ -75,7 +75,17 @@ void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
void
iwht4x4_12
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_iwht4x4_16_add_c
(
in
,
out
,
stride
,
12
);
}
#endif
#if HAVE_SSE2
void
idct4x4_10_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct4x4_16_add_sse2
(
in
,
out
,
stride
,
10
);
}
void
idct4x4_12_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct4x4_16_add_sse2
(
in
,
out
,
stride
,
12
);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
class
Trans4x4TestBase
{
public:
...
...
@@ -416,7 +426,7 @@ INSTANTIATE_TEST_CASE_P(
C
,
Trans4x4DCT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fdct4x4_c
,
&
vp9_idct4x4_16_add_c
,
0
,
VPX_BITS_8
)));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -442,7 +452,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_c
,
1
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_c
,
2
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_c
,
3
,
VPX_BITS_8
)));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -456,7 +466,7 @@ INSTANTIATE_TEST_CASE_P(
C
,
Trans4x4WHT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fwht4x4_c
,
&
vp9_iwht4x4_16_add_c
,
0
,
VPX_BITS_8
)));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -471,7 +481,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_neon
,
1
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_neon
,
2
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_neon
,
3
,
VPX_BITS_8
)));
#endif
#endif
// HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
...
...
@@ -494,6 +504,33 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht4x4_sse2
,
&
vp9_iht4x4_16_add_sse2
,
1
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_sse2
,
&
vp9_iht4x4_16_add_sse2
,
2
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_sse2
,
&
vp9_iht4x4_16_add_sse2
,
3
,
VPX_BITS_8
)));
#endif
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans4x4DCT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_highbd_fdct4x4_c
,
&
idct4x4_10_sse2
,
0
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fdct4x4_sse2
,
&
idct4x4_10_sse2
,
0
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fdct4x4_c
,
&
idct4x4_12_sse2
,
0
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fdct4x4_sse2
,
&
idct4x4_12_sse2
,
0
,
VPX_BITS_12
),
make_tuple
(
&
vp9_fdct4x4_sse2
,
&
vp9_idct4x4_16_add_c
,
0
,
VPX_BITS_8
)));
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans4x4HT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_10
,
0
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_10
,
1
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_10
,
2
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_10
,
3
,
VPX_BITS_10
),
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_12
,
0
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_12
,
1
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_12
,
2
,
VPX_BITS_12
),
make_tuple
(
&
vp9_highbd_fht4x4_sse2
,
&
iht4x4_12
,
3
,
VPX_BITS_12
),
make_tuple
(
&
vp9_fht4x4_sse2
,
&
vp9_iht4x4_16_add_c
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_sse2
,
&
vp9_iht4x4_16_add_c
,
1
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_sse2
,
&
vp9_iht4x4_16_add_c
,
2
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht4x4_sse2
,
&
vp9_iht4x4_16_add_c
,
3
,
VPX_BITS_8
)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
}
// namespace
test/fdct8x8_test.cc
View file @
dcb29c14
...
...
@@ -71,6 +71,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
typedef
std
::
tr1
::
tuple
<
FdctFunc
,
IdctFunc
,
int
,
vpx_bit_depth_t
>
Dct8x8Param
;
typedef
std
::
tr1
::
tuple
<
FhtFunc
,
IhtFunc
,
int
,
vpx_bit_depth_t
>
Ht8x8Param
;
typedef
std
::
tr1
::
tuple
<
IdctFunc
,
IdctFunc
,
int
,
vpx_bit_depth_t
>
Idct8x8Param
;
void
fdct8x8_ref
(
const
int16_t
*
in
,
tran_low_t
*
out
,
int
stride
,
int
tx_type
)
{
vp9_fdct8x8_c
(
in
,
out
,
stride
);
...
...
@@ -96,7 +97,33 @@ void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
void
iht8x8_12
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
,
int
tx_type
)
{
vp9_highbd_iht8x8_64_add_c
(
in
,
out
,
stride
,
tx_type
,
12
);
}
#endif
void
idct8x8_10_add_10_c
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct8x8_10_add_c
(
in
,
out
,
stride
,
10
);
}
void
idct8x8_10_add_12_c
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct8x8_10_add_c
(
in
,
out
,
stride
,
12
);
}
#if HAVE_SSE2
void
idct8x8_10_add_10_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct8x8_10_add_sse2
(
in
,
out
,
stride
,
10
);
}
void
idct8x8_10_add_12_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct8x8_10_add_sse2
(
in
,
out
,
stride
,
12
);
}
void
idct8x8_64_add_10_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct8x8_64_add_sse2
(
in
,
out
,
stride
,
10
);
}
void
idct8x8_64_add_12_sse2
(
const
tran_low_t
*
in
,
uint8_t
*
out
,
int
stride
)
{
vp9_highbd_idct8x8_64_add_sse2
(
in
,
out
,
stride
,
12
);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
class
FwdTrans8x8TestBase
{
public:
...
...
@@ -146,9 +173,10 @@ class FwdTrans8x8TestBase {
memset
(
count_sign_block
,
0
,
sizeof
(
count_sign_block
));
for
(
int
i
=
0
;
i
<
count_test_block
;
++
i
)
{
// Initialize a test block with input range [-
15, 15
].
// Initialize a test block with input range [-
mask_ / 16, mask_ / 16
].
for
(
int
j
=
0
;
j
<
64
;
++
j
)
test_input_block
[
j
]
=
(
rnd
.
Rand8
()
>>
4
)
-
(
rnd
.
Rand8
()
>>
4
);
test_input_block
[
j
]
=
((
rnd
.
Rand16
()
&
mask_
)
>>
4
)
-
((
rnd
.
Rand16
()
&
mask_
)
>>
4
);
ASM_REGISTER_STATE_CHECK
(
RunFwdTxfm
(
test_input_block
,
test_output_block
,
pitch_
));
...
...
@@ -188,7 +216,7 @@ class FwdTrans8x8TestBase {
#endif
for
(
int
i
=
0
;
i
<
count_test_block
;
++
i
)
{
// Initialize a test block with input range [-
255, 255
].
// Initialize a test block with input range [-
mask_, mask_
].
for
(
int
j
=
0
;
j
<
64
;
++
j
)
{
if
(
bit_depth_
==
VPX_BITS_8
)
{
src
[
j
]
=
rnd
.
Rand8
();
...
...
@@ -427,6 +455,63 @@ class FwdTrans8x8TestBase {
}
}
}
void
CompareInvReference
(
IdctFunc
ref_txfm
,
int
thresh
)
{
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
const
int
count_test_block
=
10000
;
const
int
eob
=
12
;
DECLARE_ALIGNED_ARRAY
(
16
,
tran_low_t
,
coeff
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
dst
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
ref
,
kNumCoeffs
);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
dst16
,
kNumCoeffs
);
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
ref16
,
kNumCoeffs
);
#endif
const
int16_t
*
scan
=
vp9_default_scan_orders
[
TX_8X8
].
scan
;
for
(
int
i
=
0
;
i
<
count_test_block
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
{
if
(
j
<
eob
)
{
// Random values less than the threshold, either positive or negative
coeff
[
scan
[
j
]]
=
rnd
(
thresh
)
*
(
1
-
2
*
(
i
%
2
));
}
else
{
coeff
[
scan
[
j
]]
=
0
;
}
if
(
bit_depth_
==
VPX_BITS_8
)
{
dst
[
j
]
=
0
;
ref
[
j
]
=
0
;
#if CONFIG_VP9_HIGHBITDEPTH
}
else
{
dst16
[
j
]
=
0
;
ref16
[
j
]
=
0
;
#endif
}
}
if
(
bit_depth_
==
VPX_BITS_8
)
{
ref_txfm
(
coeff
,
ref
,
pitch_
);
ASM_REGISTER_STATE_CHECK
(
RunInvTxfm
(
coeff
,
dst
,
pitch_
));
#if CONFIG_VP9_HIGHBITDEPTH
}
else
{
ref_txfm
(
coeff
,
CONVERT_TO_BYTEPTR
(
ref16
),
pitch_
);
ASM_REGISTER_STATE_CHECK
(
RunInvTxfm
(
coeff
,
CONVERT_TO_BYTEPTR
(
dst16
),
pitch_
));
#endif
}
for
(
int
j
=
0
;
j
<
kNumCoeffs
;
++
j
)
{
#if CONFIG_VP9_HIGHBITDEPTH
const
uint32_t
diff
=
bit_depth_
==
VPX_BITS_8
?
dst
[
j
]
-
ref
[
j
]
:
dst16
[
j
]
-
ref16
[
j
];
#else
const
uint32_t
diff
=
dst
[
j
]
-
ref
[
j
];
#endif
const
uint32_t
error
=
diff
*
diff
;
EXPECT_EQ
(
0u
,
error
)
<<
"Error: 8x8 IDCT has error "
<<
error
<<
" at index "
<<
j
;
}
}
}
int
pitch_
;
int
tx_type_
;
FhtFunc
fwd_txfm_ref
;
...
...
@@ -526,6 +611,38 @@ TEST_P(FwdTrans8x8HT, ExtremalCheck) {
RunExtremalCheck
();
}
class
InvTrans8x8DCT
:
public
FwdTrans8x8TestBase
,
public
::
testing
::
TestWithParam
<
Idct8x8Param
>
{
public:
virtual
~
InvTrans8x8DCT
()
{}
virtual
void
SetUp
()
{
ref_txfm_
=
GET_PARAM
(
0
);
inv_txfm_
=
GET_PARAM
(
1
);
thresh_
=
GET_PARAM
(
2
);
pitch_
=
8
;
bit_depth_
=
GET_PARAM
(
3
);
mask_
=
(
1
<<
bit_depth_
)
-
1
;
}
virtual
void
TearDown
()
{
libvpx_test
::
ClearSystemState
();
}
protected:
void
RunInvTxfm
(
tran_low_t
*
out
,
uint8_t
*
dst
,
int
stride
)
{
inv_txfm_
(
out
,
dst
,
stride
);
}
void
RunFwdTxfm
(
int16_t
*
out
,
tran_low_t
*
dst
,
int
stride
)
{}
IdctFunc
ref_txfm_
;
IdctFunc
inv_txfm_
;
int
thresh_
;
};
TEST_P
(
InvTrans8x8DCT
,
CompareReference
)
{
CompareInvReference
(
ref_txfm_
,
thresh_
);
}
using
std
::
tr1
::
make_tuple
;
#if CONFIG_VP9_HIGHBITDEPTH
...
...
@@ -540,7 +657,7 @@ INSTANTIATE_TEST_CASE_P(
C
,
FwdTrans8x8DCT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fdct8x8_c
,
&
vp9_idct8x8_64_add_c
,
0
,
VPX_BITS_8
)));
#endif
#endif
// CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P
(
...
...
@@ -566,7 +683,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht8x8_c
,
&
vp9_iht8x8_64_add_c
,
1
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fht8x8_c
,
&
vp9_iht8x8_64_add_c
,
2
,
VPX_BITS_8
),