Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
667db87a
Commit
667db87a
authored
Jun 10, 2016
by
James Zern
Committed by
Gerrit Code Review
Jun 10, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Revert "Optimize wedge partition selection."" into nextgenv2
parents
9d924a0c
95340fcc
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
120 additions
and
1034 deletions
+120
-1034
test/test.mk
test/test.mk
+0
-1
test/vp10_wedge_utils_test.cc
test/vp10_wedge_utils_test.cc
+0
-399
vp10/common/reconinter.c
vp10/common/reconinter.c
+8
-0
vp10/common/reconinter.h
vp10/common/reconinter.h
+1
-0
vp10/common/vp10_rtcd_defs.pl
vp10/common/vp10_rtcd_defs.pl
+0
-9
vp10/encoder/rdopt.c
vp10/encoder/rdopt.c
+111
-226
vp10/encoder/wedge_utils.c
vp10/encoder/wedge_utils.c
+0
-135
vp10/encoder/x86/wedge_utils_sse2.c
vp10/encoder/x86/wedge_utils_sse2.c
+0
-260
vp10/vp10cx.mk
vp10/vp10cx.mk
+0
-4
No files found.
test/test.mk
View file @
667db87a
...
...
@@ -185,7 +185,6 @@ ifeq ($(CONFIG_EXT_INTER),yes)
LIBVPX_TEST_SRCS-$(HAVE_SSSE3)
+=
masked_variance_test.cc
LIBVPX_TEST_SRCS-$(HAVE_SSSE3)
+=
masked_sad_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER)
+=
blend_mask6_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER)
+=
vp10_wedge_utils_test.cc
endif
ifeq
($(CONFIG_VP9_HIGHBITDEPTH),yes)
...
...
test/vp10_wedge_utils_test.cc
deleted
100644 → 0
View file @
9d924a0c
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp10_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vp10/common/enums.h"
#include "test/array_utils.h"
#include "test/assertion_helpers.h"
#include "test/function_equivalence_test.h"
#include "test/randomise.h"
#include "test/register_state_check.h"
#include "test/snapshot.h"
#define WEDGE_WEIGHT_BITS 6
#define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS))
using
std
::
tr1
::
make_tuple
;
using
libvpx_test
::
FunctionEquivalenceTest
;
using
libvpx_test
::
Snapshot
;
using
libvpx_test
::
Randomise
;
using
libvpx_test
::
array_utils
::
arraySet
;
using
libvpx_test
::
assertion_helpers
::
ArraysEq
;
using
libvpx_test
::
assertion_helpers
::
ArraysEqWithin
;
namespace
{
static
const
int16_t
int13_max
=
(
1
<<
12
)
-
1
;
//////////////////////////////////////////////////////////////////////////////
// vp10_wedge_sse_from_residuals - functionality
//////////////////////////////////////////////////////////////////////////////
class
WedgeUtilsSSEFuncTest
:
public
testing
::
Test
{
protected:
Snapshot
snapshot
;
Randomise
randomise
;
};
static
void
equiv_blend_residuals
(
int16_t
*
r
,
const
int16_t
*
r0
,
const
int16_t
*
r1
,
const
uint8_t
*
m
,
int
N
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
const
int32_t
m0
=
m
[
i
];
const
int32_t
m1
=
MAX_MASK_VALUE
-
m0
;
const
int16_t
R
=
m0
*
r0
[
i
]
+
m1
*
r1
[
i
];
// Note that this rounding is designed to match the result
// you would get when actually blending the 2 predictors and computing
// the residuals.
r
[
i
]
=
ROUND_POWER_OF_TWO
(
R
-
1
,
WEDGE_WEIGHT_BITS
);
}
}
static
uint64_t
equiv_sse_from_residuals
(
const
int16_t
*
r0
,
const
int16_t
*
r1
,
const
uint8_t
*
m
,
int
N
)
{
uint64_t
acc
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
const
int32_t
m0
=
m
[
i
];
const
int32_t
m1
=
MAX_MASK_VALUE
-
m0
;
const
int16_t
R
=
m0
*
r0
[
i
]
+
m1
*
r1
[
i
];
const
int32_t
r
=
ROUND_POWER_OF_TWO
(
R
-
1
,
WEDGE_WEIGHT_BITS
);
acc
+=
r
*
r
;
}
return
acc
;
}
TEST_F
(
WedgeUtilsSSEFuncTest
,
ResidualBlendingEquiv
)
{
for
(
int
i
=
0
;
i
<
1000
&&
!
HasFatalFailure
();
i
++
)
{
uint8_t
s
[
MAX_SB_SQUARE
];
uint8_t
p0
[
MAX_SB_SQUARE
];
uint8_t
p1
[
MAX_SB_SQUARE
];
uint8_t
p
[
MAX_SB_SQUARE
];
int16_t
r0
[
MAX_SB_SQUARE
];
int16_t
r1
[
MAX_SB_SQUARE
];
int16_t
r_ref
[
MAX_SB_SQUARE
];
int16_t
r_tst
[
MAX_SB_SQUARE
];
uint8_t
m
[
MAX_SB_SQUARE
];
randomise
(
s
);
randomise
(
m
,
0
,
MAX_MASK_VALUE
+
1
);
const
int
w
=
1
<<
randomise
.
uniform
<
uint32_t
>
(
3
,
MAX_SB_SIZE_LOG2
);
const
int
h
=
1
<<
randomise
.
uniform
<
uint32_t
>
(
3
,
MAX_SB_SIZE_LOG2
);
const
int
N
=
w
*
h
;
for
(
int
j
=
0
;
j
<
N
;
j
++
)
{
p0
[
j
]
=
clamp
(
s
[
j
]
+
randomise
.
uniform
<
int
>
(
-
16
,
17
),
0
,
UINT8_MAX
);
p1
[
j
]
=
clamp
(
s
[
j
]
+
randomise
.
uniform
<
int
>
(
-
16
,
17
),
0
,
UINT8_MAX
);
}
vpx_blend_mask6
(
p
,
w
,
p0
,
w
,
p1
,
w
,
m
,
w
,
h
,
w
,
0
,
0
);
vpx_subtract_block
(
h
,
w
,
r0
,
w
,
s
,
w
,
p0
,
w
);
vpx_subtract_block
(
h
,
w
,
r1
,
w
,
s
,
w
,
p1
,
w
);
vpx_subtract_block
(
h
,
w
,
r_ref
,
w
,
s
,
w
,
p
,
w
);
equiv_blend_residuals
(
r_tst
,
r0
,
r1
,
m
,
N
);
ASSERT_TRUE
(
ArraysEqWithin
(
r_ref
,
r_tst
,
0
,
N
));
uint64_t
ref_sse
=
vpx_sum_squares_i16
(
r_ref
,
N
);
uint64_t
tst_sse
=
equiv_sse_from_residuals
(
r0
,
r1
,
m
,
N
);
ASSERT_EQ
(
ref_sse
,
tst_sse
);
}
}
static
uint64_t
sse_from_residuals
(
const
int16_t
*
r0
,
const
int16_t
*
r1
,
const
uint8_t
*
m
,
int
N
)
{
uint64_t
acc
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
const
int32_t
m0
=
m
[
i
];
const
int32_t
m1
=
MAX_MASK_VALUE
-
m0
;
const
int32_t
r
=
m0
*
r0
[
i
]
+
m1
*
r1
[
i
];
acc
+=
r
*
r
;
}
return
ROUND_POWER_OF_TWO
(
acc
,
2
*
WEDGE_WEIGHT_BITS
);
}
TEST_F
(
WedgeUtilsSSEFuncTest
,
ResidualBlendingMethod
)
{
for
(
int
i
=
0
;
i
<
1000
&&
!
HasFatalFailure
();
i
++
)
{
int16_t
r0
[
MAX_SB_SQUARE
];
int16_t
r1
[
MAX_SB_SQUARE
];
int16_t
d
[
MAX_SB_SQUARE
];
uint8_t
m
[
MAX_SB_SQUARE
];
randomise
(
r1
,
2
*
INT8_MIN
,
2
*
INT8_MAX
+
1
);
randomise
(
d
,
2
*
INT8_MIN
,
2
*
INT8_MAX
+
1
);
randomise
(
m
,
0
,
MAX_MASK_VALUE
+
1
);
const
int
N
=
64
*
randomise
.
uniform
<
uint32_t
>
(
1
,
MAX_SB_SQUARE
/
64
);
for
(
int
j
=
0
;
j
<
N
;
j
++
)
r0
[
j
]
=
r1
[
j
]
+
d
[
j
];
uint64_t
ref_res
,
tst_res
;
ref_res
=
sse_from_residuals
(
r0
,
r1
,
m
,
N
);
tst_res
=
vp10_wedge_sse_from_residuals
(
r1
,
d
,
m
,
N
);
ASSERT_EQ
(
ref_res
,
tst_res
);
}
}
//////////////////////////////////////////////////////////////////////////////
// vp10_wedge_sse_from_residuals - optimizations
//////////////////////////////////////////////////////////////////////////////
typedef
uint64_t
(
*
FSSE
)(
const
int16_t
*
r1
,
const
int16_t
*
d
,
const
uint8_t
*
m
,
int
N
);
class
WedgeUtilsSSEOptTest
:
public
FunctionEquivalenceTest
<
FSSE
>
{
protected:
void
Common
()
{
const
int
N
=
64
*
randomise
.
uniform
<
uint32_t
>
(
1
,
MAX_SB_SQUARE
/
64
);
snapshot
(
r1
);
snapshot
(
d
);
snapshot
(
m
);
uint64_t
ref_res
,
tst_res
;
ref_res
=
ref_func_
(
r1
,
d
,
m
,
N
);
ASM_REGISTER_STATE_CHECK
(
tst_res
=
tst_func_
(
r1
,
d
,
m
,
N
));
ASSERT_EQ
(
ref_res
,
tst_res
);
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
r1
),
r1
));
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
d
),
d
));
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
m
),
m
));
}
Snapshot
snapshot
;
Randomise
randomise
;
DECLARE_ALIGNED
(
16
,
int16_t
,
r1
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
d
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
uint8_t
,
m
[
MAX_SB_SQUARE
]);
};
TEST_P
(
WedgeUtilsSSEOptTest
,
RandomValues
)
{
for
(
int
i
=
0
;
i
<
10000
&&
!
HasFatalFailure
();
i
++
)
{
randomise
(
r1
,
-
int13_max
,
int13_max
+
1
);
randomise
(
d
,
-
int13_max
,
int13_max
+
1
);
randomise
(
m
,
0
,
65
);
Common
();
}
}
TEST_P
(
WedgeUtilsSSEOptTest
,
ExtremeValues
)
{
for
(
int
i
=
0
;
i
<
10000
&&
!
HasFatalFailure
();
i
++
)
{
if
(
randomise
.
uniform
<
bool
>
())
arraySet
(
r1
,
int13_max
);
else
arraySet
(
r1
,
-
int13_max
);
if
(
randomise
.
uniform
<
bool
>
())
arraySet
(
d
,
int13_max
);
else
arraySet
(
d
,
-
int13_max
);
arraySet
(
m
,
MAX_MASK_VALUE
);
Common
();
}
}
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P
(
SSE2
,
WedgeUtilsSSEOptTest
,
::
testing
::
Values
(
make_tuple
(
&
vp10_wedge_sse_from_residuals_c
,
&
vp10_wedge_sse_from_residuals_sse2
)
)
);
#endif // HAVE_SSE2
//////////////////////////////////////////////////////////////////////////////
// vp10_wedge_sign_from_residuals
//////////////////////////////////////////////////////////////////////////////
typedef
int
(
*
FSign
)(
const
int16_t
*
ds
,
const
uint8_t
*
m
,
int
N
,
int64_t
limit
);
class
WedgeUtilsSignOptTest
:
public
FunctionEquivalenceTest
<
FSign
>
{
protected:
static
const
int
maxSize
=
8196
;
// Size limited by SIMD implementation.
void
Common
()
{
const
int
maxN
=
VPXMIN
(
maxSize
,
MAX_SB_SQUARE
);
const
int
N
=
64
*
randomise
.
uniform
<
uint32_t
>
(
1
,
maxN
/
64
);
int64_t
limit
;
limit
=
(
int64_t
)
vpx_sum_squares_i16
(
r0
,
N
);
limit
-=
(
int64_t
)
vpx_sum_squares_i16
(
r1
,
N
);
limit
*=
(
1
<<
WEDGE_WEIGHT_BITS
)
/
2
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
ds
[
i
]
=
clamp
(
r0
[
i
]
*
r0
[
i
]
-
r1
[
i
]
*
r1
[
i
],
INT16_MIN
,
INT16_MAX
);
snapshot
(
r0
);
snapshot
(
r1
);
snapshot
(
ds
);
snapshot
(
m
);
int
ref_res
,
tst_res
;
ref_res
=
ref_func_
(
ds
,
m
,
N
,
limit
);
ASM_REGISTER_STATE_CHECK
(
tst_res
=
tst_func_
(
ds
,
m
,
N
,
limit
));
ASSERT_EQ
(
ref_res
,
tst_res
);
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
r0
),
r0
));
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
r1
),
r1
));
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
ds
),
ds
));
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
m
),
m
));
}
Snapshot
snapshot
;
Randomise
randomise
;
DECLARE_ALIGNED
(
16
,
int16_t
,
r0
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
r1
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
ds
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
uint8_t
,
m
[
MAX_SB_SQUARE
]);
};
TEST_P
(
WedgeUtilsSignOptTest
,
RandomValues
)
{
for
(
int
i
=
0
;
i
<
10000
&&
!
HasFatalFailure
();
i
++
)
{
randomise
(
r0
,
-
int13_max
,
int13_max
+
1
);
randomise
(
r1
,
-
int13_max
,
int13_max
+
1
);
randomise
(
m
,
0
,
MAX_MASK_VALUE
+
1
);
Common
();
}
}
TEST_P
(
WedgeUtilsSignOptTest
,
ExtremeValues
)
{
for
(
int
i
=
0
;
i
<
10000
&&
!
HasFatalFailure
();
i
++
)
{
switch
(
randomise
.
uniform
<
int
>
(
4
))
{
case
0
:
arraySet
(
r0
,
0
);
arraySet
(
r1
,
int13_max
);
break
;
case
1
:
arraySet
(
r0
,
int13_max
);
arraySet
(
r1
,
0
);
break
;
case
2
:
arraySet
(
r0
,
0
);
arraySet
(
r1
,
-
int13_max
);
break
;
default:
arraySet
(
r0
,
-
int13_max
);
arraySet
(
r1
,
0
);
break
;
}
arraySet
(
m
,
MAX_MASK_VALUE
);
Common
();
}
}
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P
(
SSE2
,
WedgeUtilsSignOptTest
,
::
testing
::
Values
(
make_tuple
(
&
vp10_wedge_sign_from_residuals_c
,
&
vp10_wedge_sign_from_residuals_sse2
)
)
);
#endif // HAVE_SSE2
//////////////////////////////////////////////////////////////////////////////
// vp10_wedge_compute_delta_squares
//////////////////////////////////////////////////////////////////////////////
typedef
void
(
*
FDS
)(
int16_t
*
d
,
const
int16_t
*
a
,
const
int16_t
*
b
,
int
N
);
class
WedgeUtilsDeltaSquaresOptTest
:
public
FunctionEquivalenceTest
<
FDS
>
{
protected:
void
Common
()
{
const
int
N
=
64
*
randomise
.
uniform
<
uint32_t
>
(
1
,
MAX_SB_SQUARE
/
64
);
randomise
(
d_ref
);
randomise
(
d_tst
);
snapshot
(
a
);
snapshot
(
b
);
ref_func_
(
d_ref
,
a
,
b
,
N
);
ASM_REGISTER_STATE_CHECK
(
tst_func_
(
d_tst
,
a
,
b
,
N
));
ASSERT_TRUE
(
ArraysEqWithin
(
d_ref
,
d_tst
,
0
,
N
));
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
a
),
a
));
ASSERT_TRUE
(
ArraysEq
(
snapshot
.
get
(
b
),
b
));
}
Snapshot
snapshot
;
Randomise
randomise
;
DECLARE_ALIGNED
(
16
,
int16_t
,
a
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
b
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
d_ref
[
MAX_SB_SQUARE
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
d_tst
[
MAX_SB_SQUARE
]);
};
TEST_P
(
WedgeUtilsDeltaSquaresOptTest
,
RandomValues
)
{
for
(
int
i
=
0
;
i
<
10000
&&
!
HasFatalFailure
();
i
++
)
{
randomise
(
a
);
randomise
(
b
,
-
INT16_MAX
,
INT16_MAX
+
1
);
Common
();
}
}
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P
(
SSE2
,
WedgeUtilsDeltaSquaresOptTest
,
::
testing
::
Values
(
make_tuple
(
&
vp10_wedge_compute_delta_squares_c
,
&
vp10_wedge_compute_delta_squares_sse2
)
)
);
#endif // HAVE_SSE2
}
// namespace
vp10/common/reconinter.c
View file @
667db87a
...
...
@@ -2440,6 +2440,7 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
int
wedge_offset_x
,
int
wedge_offset_y
,
#endif // CONFIG_SUPERTX
int
mi_x
,
int
mi_y
,
uint8_t
*
ext_dst0
,
int
ext_dst_stride0
,
uint8_t
*
ext_dst1
,
...
...
@@ -2453,6 +2454,8 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
(
void
)
block
;
(
void
)
bw
;
(
void
)
bh
;
(
void
)
mi_x
;
(
void
)
mi_y
;
if
(
is_compound
&&
is_interinter_wedge_used
(
mbmi
->
sb_type
)
...
...
@@ -2516,9 +2519,12 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
void
vp10_build_wedge_inter_predictor_from_buf
(
MACROBLOCKD
*
xd
,
BLOCK_SIZE
bsize
,
int
plane_from
,
int
plane_to
,
int
mi_row
,
int
mi_col
,
uint8_t
*
ext_dst0
[
3
],
int
ext_dst_stride0
[
3
],
uint8_t
*
ext_dst1
[
3
],
int
ext_dst_stride1
[
3
])
{
int
plane
;
const
int
mi_x
=
mi_col
*
MI_SIZE
;
const
int
mi_y
=
mi_row
*
MI_SIZE
;
for
(
plane
=
plane_from
;
plane
<=
plane_to
;
++
plane
)
{
const
BLOCK_SIZE
plane_bsize
=
get_plane_block_size
(
bsize
,
&
xd
->
plane
[
plane
]);
...
...
@@ -2537,6 +2543,7 @@ void vp10_build_wedge_inter_predictor_from_buf(
#if CONFIG_SUPERTX
0
,
0
,
#endif
mi_x
,
mi_y
,
ext_dst0
[
plane
],
ext_dst_stride0
[
plane
],
ext_dst1
[
plane
],
...
...
@@ -2547,6 +2554,7 @@ void vp10_build_wedge_inter_predictor_from_buf(
#if CONFIG_SUPERTX
0
,
0
,
#endif
mi_x
,
mi_y
,
ext_dst0
[
plane
],
ext_dst_stride0
[
plane
],
ext_dst1
[
plane
],
...
...
vp10/common/reconinter.h
View file @
667db87a
...
...
@@ -646,6 +646,7 @@ void vp10_build_inter_predictors_for_planes_single_buf(
void
vp10_build_wedge_inter_predictor_from_buf
(
MACROBLOCKD
*
xd
,
BLOCK_SIZE
bsize
,
int
plane_from
,
int
plane_to
,
int
mi_row
,
int
mi_col
,
uint8_t
*
ext_dst0
[
3
],
int
ext_dst_stride0
[
3
],
uint8_t
*
ext_dst1
[
3
],
int
ext_dst_stride1
[
3
]);
#endif // CONFIG_EXT_INTER
...
...
vp10/common/vp10_rtcd_defs.pl
View file @
667db87a
...
...
@@ -690,15 +690,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
}
# End vp10_high encoder functions
if
(
vpx_config
("
CONFIG_EXT_INTER
")
eq
"
yes
")
{
add_proto
qw/uint64_t vp10_wedge_sse_from_residuals/
,
"
const int16_t *r1, const int16_t *d, const uint8_t *m, int N
";
specialize
qw/vp10_wedge_sse_from_residuals sse2/
;
add_proto
qw/int vp10_wedge_sign_from_residuals/
,
"
const int16_t *ds, const uint8_t *m, int N, int64_t limit
";
specialize
qw/vp10_wedge_sign_from_residuals sse2/
;
add_proto
qw/void vp10_wedge_compute_delta_squares/
,
"
int16_t *d, const int16_t *a, const int16_t *b, int N
";
specialize
qw/vp10_wedge_compute_delta_squares sse2/
;
}
}
# end encoder functions
1
;
vp10/encoder/rdopt.c
View file @
667db87a
This diff is collapsed.
Click to expand it.
vp10/encoder/wedge_utils.c
deleted
100644 → 0
View file @
9d924a0c
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vp10/common/reconinter.h"
#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
/**
* Computes SSE of a compound predictor constructed from 2 fundamental
* predictors p0 and p1 using blending with mask.
*
* r1: Residuals of p1.
* (source - p1)
* d: Difference of p1 and p0.
* (p1 - p0)
* m: The blending mask
* N: Number of pixels
*
* 'r1', 'd', and 'm' are contiguous.
*
* Computes:
* Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
* Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
* where r0 is (source - p0), and r1 is (source - p1), which is in turn
* is equivalent to:
* Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
* which is the SSE of the residuals of the compound predictor scaled up by
* MAX_MASK_VALUE**2.
*
* Note that we clamp the partial term in the loop to 16 bits signed. This is
* to facilitate equivalent SIMD implementation. It should have no effect if
* residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
* holds for 8 bit input, and on real input, it should hold practically always,
* as residuals are expected to be small.
*/
uint64_t
vp10_wedge_sse_from_residuals_c
(
const
int16_t
*
r1
,
const
int16_t
*
d
,
const
uint8_t
*
m
,
int
N
)
{
uint64_t
csse
=
0
;
int
i
;
assert
(
N
%
64
==
0
);
for
(
i
=
0
;
i
<
N
;
i
++
)
{
int32_t
t
=
MAX_MASK_VALUE
*
r1
[
i
]
+
m
[
i
]
*
d
[
i
];
t
=
clamp
(
t
,
INT16_MIN
,
INT16_MAX
);
csse
+=
t
*
t
;
}
return
ROUND_POWER_OF_TWO
(
csse
,
2
*
WEDGE_WEIGHT_BITS
);
}
/**
* Choose the mask sign for a compound predictor.
*
* ds: Difference of the squares of the residuals.
* r0**2 - r1**2
* m: The blending mask
* N: Number of pixels
* limit: Pre-computed threshold value.
* MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
*
* 'ds' and 'm' are contiguous.
*
* Returns true if the negated mask has lower SSE compared to the positive
* mask. Computation is based on:
* Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
* >
* Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
*
* which can be simplified to:
*
* Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
*
* The right hand side does not depend on the mask, and needs to be passed as
* the 'limit' parameter.
*
* After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
* hand side is simply a scalar product between an int16_t and uint8_t vector.
*
* Note that for efficiency, ds is stored on 16 bits. Real input residuals
* being small, this should not cause a noticeable issue.
*/
int
vp10_wedge_sign_from_residuals_c
(
const
int16_t
*
ds
,
const
uint8_t
*
m
,
int
N
,
int64_t
limit
)
{
int64_t
acc
=
0
;
assert
(
N
%
64
==
0
);
do
{
acc
+=
*
ds
++
*
*
m
++
;
}
while
(
--
N
);
return
acc
>
limit
;
}
/**
* Compute the element-wise difference of the squares of 2 arrays.
*
* d: Difference of the squares of the inputs: a**2 - b**2
* a: First input array
* b: Second input array
* N: Number of elements
*
* 'd', 'a', and 'b' are contiguous.
*
* The result is saturated to signed 16 bits.
*/
void
vp10_wedge_compute_delta_squares_c
(
int16_t
*
d
,
const
int16_t
*
a
,
const
int16_t
*
b
,
int
N
)
{
int
i
;
assert
(
N
%
64
==
0
);
for
(
i
=
0
;
i
<
N
;
i
++
)
d
[
i
]
=
clamp
(
a
[
i
]
*
a
[
i
]
-
b
[
i