Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
339ef0ce
Commit
339ef0ce
authored
Mar 02, 2016
by
Debargha Mukherjee
Committed by
Gerrit Code Review
Mar 02, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Adds masked variance and sad functions for wedge" into nextgenv2
parents
94256166
1d69ceee
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
4368 additions
and
17 deletions
+4368
-17
test/masked_sad_test.cc
test/masked_sad_test.cc
+209
-0
test/masked_variance_test.cc
test/masked_variance_test.cc
+752
-0
test/test.mk
test/test.mk
+5
-0
vpx_dsp/sad.c
vpx_dsp/sad.c
+102
-0
vpx_dsp/variance.c
vpx_dsp/variance.c
+338
-17
vpx_dsp/vpx_dsp.mk
vpx_dsp/vpx_dsp.mk
+7
-0
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/vpx_dsp_rtcd_defs.pl
+488
-0
vpx_dsp/vpx_filter.h
vpx_dsp/vpx_filter.h
+4
-0
vpx_dsp/x86/masked_sad_intrin_ssse3.c
vpx_dsp/x86/masked_sad_intrin_ssse3.c
+367
-0
vpx_dsp/x86/masked_variance_intrin_ssse3.c
vpx_dsp/x86/masked_variance_intrin_ssse3.c
+2096
-0
No files found.
test/masked_sad_test.cc
0 → 100644
View file @
339ef0ce
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
using
libvpx_test
::
ACMRandom
;
namespace
{
const
int
number_of_iterations
=
500
;
typedef
unsigned
int
(
*
MaskedSADFunc
)(
const
uint8_t
*
a
,
int
a_stride
,
const
uint8_t
*
b
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
);
typedef
std
::
tr1
::
tuple
<
MaskedSADFunc
,
MaskedSADFunc
>
MaskedSADParam
;
class
MaskedSADTest
:
public
::
testing
::
TestWithParam
<
MaskedSADParam
>
{
public:
virtual
~
MaskedSADTest
()
{}
virtual
void
SetUp
()
{
maskedSAD_op_
=
GET_PARAM
(
0
);
ref_maskedSAD_op_
=
GET_PARAM
(
1
);
}
virtual
void
TearDown
()
{
libvpx_test
::
ClearSystemState
();
}
protected:
MaskedSADFunc
maskedSAD_op_
;
MaskedSADFunc
ref_maskedSAD_op_
;
};
TEST_P
(
MaskedSADTest
,
OperationCheck
)
{
unsigned
int
ref_ret
,
ret
;
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
DECLARE_ALIGNED
(
16
,
uint8_t
,
src_ptr
[
4096
]);
DECLARE_ALIGNED
(
16
,
uint8_t
,
ref_ptr
[
4096
]);
DECLARE_ALIGNED
(
16
,
uint8_t
,
msk_ptr
[
4096
]);
int
err_count
=
0
;
int
first_failure
=
-
1
;
int
src_stride
=
64
;
int
ref_stride
=
64
;
int
msk_stride
=
64
;
for
(
int
i
=
0
;
i
<
number_of_iterations
;
++
i
)
{
for
(
int
j
=
0
;
j
<
4096
;
j
++
)
{
src_ptr
[
j
]
=
rnd
.
Rand8
();
ref_ptr
[
j
]
=
rnd
.
Rand8
();
msk_ptr
[
j
]
=
((
rnd
.
Rand8
()
&
0x7f
)
>
64
)
?
rnd
.
Rand8
()
&
0x3f
:
64
;
assert
(
msk_ptr
[
j
]
<=
64
);
}
ref_ret
=
ref_maskedSAD_op_
(
src_ptr
,
src_stride
,
ref_ptr
,
ref_stride
,
msk_ptr
,
msk_stride
);
ASM_REGISTER_STATE_CHECK
(
ret
=
maskedSAD_op_
(
src_ptr
,
src_stride
,
ref_ptr
,
ref_stride
,
msk_ptr
,
msk_stride
));
if
(
ret
!=
ref_ret
)
{
err_count
++
;
if
(
first_failure
==
-
1
)
first_failure
=
i
;
}
}
EXPECT_EQ
(
0
,
err_count
)
<<
"Error: Masked SAD Test, C output doesn't match SSSE3 output. "
<<
"First failed at test case "
<<
first_failure
;
}
#if CONFIG_VP9_HIGHBITDEPTH
typedef
unsigned
int
(
*
HighbdMaskedSADFunc
)(
const
uint8_t
*
a
,
int
a_stride
,
const
uint8_t
*
b
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
);
typedef
std
::
tr1
::
tuple
<
HighbdMaskedSADFunc
,
HighbdMaskedSADFunc
>
HighbdMaskedSADParam
;
class
HighbdMaskedSADTest
:
public
::
testing
::
TestWithParam
<
HighbdMaskedSADParam
>
{
public:
virtual
~
HighbdMaskedSADTest
()
{}
virtual
void
SetUp
()
{
maskedSAD_op_
=
GET_PARAM
(
0
);
ref_maskedSAD_op_
=
GET_PARAM
(
1
);
}
virtual
void
TearDown
()
{
libvpx_test
::
ClearSystemState
();
}
protected:
HighbdMaskedSADFunc
maskedSAD_op_
;
HighbdMaskedSADFunc
ref_maskedSAD_op_
;
};
TEST_P
(
HighbdMaskedSADTest
,
OperationCheck
)
{
unsigned
int
ref_ret
,
ret
;
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
DECLARE_ALIGNED
(
16
,
uint16_t
,
src_ptr
[
4096
]);
DECLARE_ALIGNED
(
16
,
uint16_t
,
ref_ptr
[
4096
]);
DECLARE_ALIGNED
(
16
,
uint8_t
,
msk_ptr
[
4096
]);
uint8_t
*
src8_ptr
=
CONVERT_TO_BYTEPTR
(
src_ptr
);
uint8_t
*
ref8_ptr
=
CONVERT_TO_BYTEPTR
(
ref_ptr
);
int
err_count
=
0
;
int
first_failure
=
-
1
;
int
src_stride
=
64
;
int
ref_stride
=
64
;
int
msk_stride
=
64
;
for
(
int
i
=
0
;
i
<
number_of_iterations
;
++
i
)
{
for
(
int
j
=
0
;
j
<
4096
;
j
++
)
{
src_ptr
[
j
]
=
rnd
.
Rand16
()
&
0xfff
;
ref_ptr
[
j
]
=
rnd
.
Rand16
()
&
0xfff
;
msk_ptr
[
j
]
=
((
rnd
.
Rand8
()
&
0x7f
)
>
64
)
?
rnd
.
Rand8
()
&
0x3f
:
64
;
}
ref_ret
=
ref_maskedSAD_op_
(
src8_ptr
,
src_stride
,
ref8_ptr
,
ref_stride
,
msk_ptr
,
msk_stride
);
ASM_REGISTER_STATE_CHECK
(
ret
=
maskedSAD_op_
(
src8_ptr
,
src_stride
,
ref8_ptr
,
ref_stride
,
msk_ptr
,
msk_stride
));
if
(
ret
!=
ref_ret
)
{
err_count
++
;
if
(
first_failure
==
-
1
)
first_failure
=
i
;
}
}
EXPECT_EQ
(
0
,
err_count
)
<<
"Error: High BD Masked SAD Test, C output doesn't match SSSE3 output. "
<<
"First failed at test case "
<<
first_failure
;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
using
std
::
tr1
::
make_tuple
;
#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P
(
SSSE3_C_COMPARE
,
MaskedSADTest
,
::
testing
::
Values
(
make_tuple
(
&
vpx_masked_sad64x64_ssse3
,
&
vpx_masked_sad64x64_c
),
make_tuple
(
&
vpx_masked_sad64x32_ssse3
,
&
vpx_masked_sad64x32_c
),
make_tuple
(
&
vpx_masked_sad32x64_ssse3
,
&
vpx_masked_sad32x64_c
),
make_tuple
(
&
vpx_masked_sad32x32_ssse3
,
&
vpx_masked_sad32x32_c
),
make_tuple
(
&
vpx_masked_sad32x16_ssse3
,
&
vpx_masked_sad32x16_c
),
make_tuple
(
&
vpx_masked_sad16x32_ssse3
,
&
vpx_masked_sad16x32_c
),
make_tuple
(
&
vpx_masked_sad16x16_ssse3
,
&
vpx_masked_sad16x16_c
),
make_tuple
(
&
vpx_masked_sad16x8_ssse3
,
&
vpx_masked_sad16x8_c
),
make_tuple
(
&
vpx_masked_sad8x16_ssse3
,
&
vpx_masked_sad8x16_c
),
make_tuple
(
&
vpx_masked_sad8x8_ssse3
,
&
vpx_masked_sad8x8_c
),
make_tuple
(
&
vpx_masked_sad8x4_ssse3
,
&
vpx_masked_sad8x4_c
),
make_tuple
(
&
vpx_masked_sad4x8_ssse3
,
&
vpx_masked_sad4x8_c
),
make_tuple
(
&
vpx_masked_sad4x4_ssse3
,
&
vpx_masked_sad4x4_c
)));
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P
(
SSSE3_C_COMPARE
,
HighbdMaskedSADTest
,
::
testing
::
Values
(
make_tuple
(
&
vp9_highbd_masked_sad64x64_ssse3
,
&
vp9_highbd_masked_sad64x64_c
),
make_tuple
(
&
vp9_highbd_masked_sad64x32_ssse3
,
&
vp9_highbd_masked_sad64x32_c
),
make_tuple
(
&
vp9_highbd_masked_sad32x64_ssse3
,
&
vp9_highbd_masked_sad32x64_c
),
make_tuple
(
&
vp9_highbd_masked_sad32x32_ssse3
,
&
vp9_highbd_masked_sad32x32_c
),
make_tuple
(
&
vp9_highbd_masked_sad32x16_ssse3
,
&
vp9_highbd_masked_sad32x16_c
),
make_tuple
(
&
vp9_highbd_masked_sad16x32_ssse3
,
&
vp9_highbd_masked_sad16x32_c
),
make_tuple
(
&
vp9_highbd_masked_sad16x16_ssse3
,
&
vp9_highbd_masked_sad16x16_c
),
make_tuple
(
&
vp9_highbd_masked_sad16x8_ssse3
,
&
vp9_highbd_masked_sad16x8_c
),
make_tuple
(
&
vp9_highbd_masked_sad8x16_ssse3
,
&
vp9_highbd_masked_sad8x16_c
),
make_tuple
(
&
vp9_highbd_masked_sad8x8_ssse3
,
&
vp9_highbd_masked_sad8x8_c
),
make_tuple
(
&
vp9_highbd_masked_sad8x4_ssse3
,
&
vp9_highbd_masked_sad8x4_c
),
make_tuple
(
&
vp9_highbd_masked_sad4x8_ssse3
,
&
vp9_highbd_masked_sad4x8_c
),
make_tuple
(
&
vp9_highbd_masked_sad4x4_ssse3
,
&
vp9_highbd_masked_sad4x4_c
)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSSE3
}
// namespace
test/masked_variance_test.cc
0 → 100644
View file @
339ef0ce
This diff is collapsed.
Click to expand it.
test/test.mk
View file @
339ef0ce
...
...
@@ -168,6 +168,11 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ANS)
+=
vp10_ans_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER)
+=
sum_squares_test.cc
ifeq
($(CONFIG_EXT_INTER),yes)
LIBVPX_TEST_SRCS-$(HAVE_SSSE3)
+=
masked_variance_test.cc
LIBVPX_TEST_SRCS-$(HAVE_SSSE3)
+=
masked_sad_test.cc
endif
endif
# VP10
## Multi-codec / unconditional whitebox tests.
...
...
vpx_dsp/sad.c
View file @
339ef0ce
...
...
@@ -316,3 +316,105 @@ highbd_sadMxNxK(4, 4, 8)
highbd_sadMxNx4D
(
4
,
4
)
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP10 && CONFIG_EXT_INTER
static
INLINE
unsigned
int
masked_sad
(
const
uint8_t
*
a
,
int
a_stride
,
const
uint8_t
*
b
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
,
int
width
,
int
height
)
{
int
y
,
x
;
unsigned
int
sad
=
0
;
for
(
y
=
0
;
y
<
height
;
y
++
)
{
for
(
x
=
0
;
x
<
width
;
x
++
)
sad
+=
m
[
x
]
*
abs
(
a
[
x
]
-
b
[
x
]);
a
+=
a_stride
;
b
+=
b_stride
;
m
+=
m_stride
;
}
sad
=
(
sad
+
31
)
>>
6
;
return
sad
;
}
#define MASKSADMxN(m, n) \
unsigned int vpx_masked_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride, \
const uint8_t *msk, int msk_stride) { \
return masked_sad(src, src_stride, ref, ref_stride, msk, msk_stride, m, n); \
}
#if CONFIG_EXT_PARTITION
MASKSADMxN
(
128
,
128
)
MASKSADMxN
(
128
,
64
)
MASKSADMxN
(
64
,
128
)
#endif // CONFIG_EXT_PARTITION
MASKSADMxN
(
64
,
64
)
MASKSADMxN
(
64
,
32
)
MASKSADMxN
(
32
,
64
)
MASKSADMxN
(
32
,
32
)
MASKSADMxN
(
32
,
16
)
MASKSADMxN
(
16
,
32
)
MASKSADMxN
(
16
,
16
)
MASKSADMxN
(
16
,
8
)
MASKSADMxN
(
8
,
16
)
MASKSADMxN
(
8
,
8
)
MASKSADMxN
(
8
,
4
)
MASKSADMxN
(
4
,
8
)
MASKSADMxN
(
4
,
4
)
#if CONFIG_VP9_HIGHBITDEPTH
static
INLINE
unsigned
int
highbd_masked_sad
(
const
uint8_t
*
a8
,
int
a_stride
,
const
uint8_t
*
b8
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
,
int
width
,
int
height
)
{
int
y
,
x
;
unsigned
int
sad
=
0
;
const
uint16_t
*
a
=
CONVERT_TO_SHORTPTR
(
a8
);
const
uint16_t
*
b
=
CONVERT_TO_SHORTPTR
(
b8
);
for
(
y
=
0
;
y
<
height
;
y
++
)
{
for
(
x
=
0
;
x
<
width
;
x
++
)
sad
+=
m
[
x
]
*
abs
(
a
[
x
]
-
b
[
x
]);
a
+=
a_stride
;
b
+=
b_stride
;
m
+=
m_stride
;
}
sad
=
(
sad
+
31
)
>>
6
;
return
sad
;
}
#define HIGHBD_MASKSADMXN(m, n) \
unsigned int vpx_highbd_masked_sad##m##x##n##_c(const uint8_t *src, \
int src_stride, \
const uint8_t *ref, \
int ref_stride, \
const uint8_t *msk, \
int msk_stride) { \
return highbd_masked_sad(src, src_stride, ref, ref_stride, \
msk, msk_stride, m, n); \
}
#if CONFIG_EXT_PARTITION
HIGHBD_MASKSADMXN
(
128
,
128
)
HIGHBD_MASKSADMXN
(
128
,
64
)
HIGHBD_MASKSADMXN
(
64
,
128
)
#endif // CONFIG_EXT_PARTITION
HIGHBD_MASKSADMXN
(
64
,
64
)
HIGHBD_MASKSADMXN
(
64
,
32
)
HIGHBD_MASKSADMXN
(
32
,
64
)
HIGHBD_MASKSADMXN
(
32
,
32
)
HIGHBD_MASKSADMXN
(
32
,
16
)
HIGHBD_MASKSADMXN
(
16
,
32
)
HIGHBD_MASKSADMXN
(
16
,
16
)
HIGHBD_MASKSADMXN
(
16
,
8
)
HIGHBD_MASKSADMXN
(
8
,
16
)
HIGHBD_MASKSADMXN
(
8
,
8
)
HIGHBD_MASKSADMXN
(
8
,
4
)
HIGHBD_MASKSADMXN
(
4
,
8
)
HIGHBD_MASKSADMXN
(
4
,
4
)
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
vpx_dsp/variance.c
View file @
339ef0ce
...
...
@@ -15,8 +15,9 @@
#include "vpx/vpx_integer.h"
#include "vpx_dsp/variance.h"
#include "vpx_dsp/vpx_filter.h"
static
const
uint8_t
bilinear_filters
[
8
][
2
]
=
{
const
uint8_t
vpx_bilinear_filters
[
BIL_SUBPEL_SHIFTS
][
2
]
=
{
{
128
,
0
},
{
112
,
16
},
{
96
,
32
},
...
...
@@ -175,9 +176,9 @@ uint32_t vpx_sub_pixel_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
bilinear_filters[xoffset]); \
vpx_
bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
}
...
...
@@ -195,9 +196,9 @@ uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c(const uint8_t *a, \
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
bilinear_filters[xoffset]); \
vpx_
bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
...
...
@@ -500,9 +501,9 @@ uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, bilinear_filters[xoffset]); \
W,
vpx_
bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
...
...
@@ -517,9 +518,9 @@ uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, bilinear_filters[xoffset]); \
W,
vpx_
bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
...
...
@@ -534,9 +535,9 @@ uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, bilinear_filters[xoffset]); \
W,
vpx_
bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
...
...
@@ -554,9 +555,9 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, bilinear_filters[xoffset]); \
W,
vpx_
bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
...
...
@@ -576,9 +577,9 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, bilinear_filters[xoffset]); \
W,
vpx_
bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
...
...
@@ -598,9 +599,9 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, bilinear_filters[xoffset]); \
W,
vpx_
bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters[yoffset]); \
vpx_
bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
...
...
@@ -654,3 +655,323 @@ void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP10 && CONFIG_EXT_INTER
void
masked_variance
(
const
uint8_t
*
a
,
int
a_stride
,
const
uint8_t
*
b
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
,
int
w
,
int
h
,
unsigned
int
*
sse
,
int
*
sum
)
{
int
i
,
j
;
int64_t
sum64
=
0
;
uint64_t
sse64
=
0
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
for
(
j
=
0
;
j
<
w
;
j
++
)
{
const
int
diff
=
(
a
[
j
]
-
b
[
j
])
*
(
m
[
j
]);
sum64
+=
diff
;
sse64
+=
diff
*
diff
;
}
a
+=
a_stride
;
b
+=
b_stride
;
m
+=
m_stride
;
}
*
sum
=
(
sum64
>=
0
)
?
((
sum64
+
31
)
>>
6
)
:
-
((
-
sum64
+
31
)
>>
6
);
*
sse
=
(
sse64
+
2047
)
>>
12
;
}
#define MASK_VAR(W, H) \
unsigned int vpx_masked_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
const uint8_t *b, int b_stride, \
const uint8_t *m, int m_stride, \
unsigned int *sse) { \
int sum; \
masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
}
#define MASK_SUBPIX_VAR(W, H) \
unsigned int vpx_masked_sub_pixel_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, \
int xoffset, int yoffset, \
const uint8_t *dst, int dst_stride, \
const uint8_t *msk, int msk_stride, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
vpx_bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
vpx_bilinear_filters[yoffset]); \
\
return vpx_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, \
msk, msk_stride, sse); \
}
MASK_VAR
(
4
,
4
)
MASK_SUBPIX_VAR
(
4
,
4
)
MASK_VAR
(
4
,
8
)
MASK_SUBPIX_VAR
(
4
,
8
)
MASK_VAR
(
8
,
4
)
MASK_SUBPIX_VAR
(
8
,
4
)
MASK_VAR
(
8
,
8
)
MASK_SUBPIX_VAR
(
8
,
8
)
MASK_VAR
(
8
,
16
)
MASK_SUBPIX_VAR
(
8
,
16
)
MASK_VAR
(
16
,
8
)
MASK_SUBPIX_VAR
(
16
,
8
)
MASK_VAR
(
16
,
16
)
MASK_SUBPIX_VAR
(
16
,
16
)
MASK_VAR
(
16
,
32
)
MASK_SUBPIX_VAR
(
16
,
32
)
MASK_VAR
(
32
,
16
)
MASK_SUBPIX_VAR
(
32
,
16
)
MASK_VAR
(
32
,
32
)
MASK_SUBPIX_VAR
(
32
,
32
)
MASK_VAR
(
32
,
64
)
MASK_SUBPIX_VAR
(
32
,
64
)
MASK_VAR
(
64
,
32
)
MASK_SUBPIX_VAR
(
64
,
32
)
MASK_VAR
(
64
,
64
)
MASK_SUBPIX_VAR
(
64
,
64
)
#if CONFIG_EXT_PARTITION
MASK_VAR
(
64
,
128
)
MASK_SUBPIX_VAR
(
64
,
128
)
MASK_VAR
(
128
,
64
)
MASK_SUBPIX_VAR
(
128
,
64
)
MASK_VAR
(
128
,
128
)
MASK_SUBPIX_VAR
(
128
,
128
)
#endif // CONFIG_EXT_PARTITION
#if CONFIG_VP9_HIGHBITDEPTH
void
highbd_masked_variance64
(
const
uint8_t
*
a8
,
int
a_stride
,
const
uint8_t
*
b8
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
,
int
w
,
int
h
,
uint64_t
*
sse64
,
int
*
sum
)
{
int
i
,
j
;
uint16_t
*
a
=
CONVERT_TO_SHORTPTR
(
a8
);
uint16_t
*
b
=
CONVERT_TO_SHORTPTR
(
b8
);
int64_t
sum64
=
0
;
*
sse64
=
0
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
for
(
j
=
0
;
j
<
w
;
j
++
)
{
const
int
diff
=
(
a
[
j
]
-
b
[
j
])
*
(
m
[
j
]);
sum64
+=
diff
;
*
sse64
+=
(
int64_t
)
diff
*
diff
;
}
a
+=
a_stride
;
b
+=
b_stride
;
m
+=
m_stride
;
}
*
sum
=
(
sum64
>=
0
)
?
((
sum64
+
31
)
>>
6
)
:
-
((
-
sum64
+
31
)
>>
6
);
*
sse64
=
(
*
sse64
+
2047
)
>>
12
;
}
void
highbd_masked_variance
(
const
uint8_t
*
a8
,
int
a_stride
,
const
uint8_t
*
b8
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
,
int
w
,
int
h
,
unsigned
int
*
sse
,
int
*
sum
)
{
uint64_t
sse64
;
highbd_masked_variance64
(
a8
,
a_stride
,
b8
,
b_stride
,
m
,
m_stride
,
w
,
h
,
&
sse64
,
sum
);
*
sse
=
(
unsigned
int
)
sse64
;
}
void
highbd_10_masked_variance
(
const
uint8_t
*
a8
,
int
a_stride
,
const
uint8_t
*
b8
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
,
int
w
,
int
h
,
unsigned
int
*
sse
,
int
*
sum
)
{
uint64_t
sse64
;
highbd_masked_variance64
(
a8
,
a_stride
,
b8
,
b_stride
,
m
,
m_stride
,
w
,
h
,
&
sse64
,
sum
);
*
sum
=
ROUND_POWER_OF_TWO
(
*
sum
,
2
);
*
sse
=
(
unsigned
int
)
ROUND_POWER_OF_TWO
(
sse64
,
4
);
}
void
highbd_12_masked_variance
(
const
uint8_t
*
a8
,
int
a_stride
,
const
uint8_t
*
b8
,
int
b_stride
,
const
uint8_t
*
m
,
int
m_stride
,
int
w
,
int
h
,
unsigned
int
*
sse
,
int
*
sum
)
{
uint64_t
sse64
;
highbd_masked_variance64
(
a8
,
a_stride
,
b8
,
b_stride
,
m
,
m_stride
,
w
,
h
,
&
sse64
,
sum
);
*
sum
=
ROUND_POWER_OF_TWO
(
*
sum
,
4
);
*
sse
=
(
unsigned
int
)
ROUND_POWER_OF_TWO
(
sse64
,
8
);
}
#define HIGHBD_MASK_VAR(W, H) \
unsigned int vpx_highbd_masked_variance##W##x##H##_c(const uint8_t *a, \
int a_stride, \
const uint8_t *b, \
int b_stride, \
const uint8_t *m, \
int m_stride, \
unsigned int *sse) { \
int sum; \
highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, \
W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
} \
\
unsigned int vpx_highbd_10_masked_variance##W##x##H##_c(const uint8_t *a, \
int a_stride, \
const uint8_t *b, \
int b_stride, \
const uint8_t *m, \
int m_stride, \
unsigned int *sse) { \
int sum; \
highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, \
W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
} \
\
unsigned int vpx_highbd_12_masked_variance##W##x##H##_c(const uint8_t *a, \
int a_stride, \
const uint8_t *b, \
int b_stride, \
const uint8_t *m, \
int m_stride, \
unsigned int *sse) { \
int sum; \
highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, \
W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
}