Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
63e49be3
Commit
63e49be3
authored
Sep 29, 2014
by
Deb Mukherjee
Committed by
Gerrit Code Review
Sep 29, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Adds two new subpel search methods"
parents
e6479c8c
4e9c0d2a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
191 additions
and
44 deletions
+191
-44
vp9/encoder/vp9_mcomp.c
vp9/encoder/vp9_mcomp.c
+183
-44
vp9/encoder/vp9_mcomp.h
vp9/encoder/vp9_mcomp.h
+2
-0
vp9/encoder/vp9_speed_features.c
vp9/encoder/vp9_speed_features.c
+4
-0
vp9/encoder/vp9_speed_features.h
vp9/encoder/vp9_speed_features.h
+2
-0
No files found.
vp9/encoder/vp9_mcomp.c
View file @
63e49be3
...
...
@@ -286,7 +286,71 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
bestmv->row *= 8; \
bestmv->col *= 8;
int
vp9_find_best_sub_pixel_tree_pruned
(
const
MACROBLOCK
*
x
,
#if CONFIG_VP9_HIGHBITDEPTH
#define SETUP_CENTER_ERROR \
if (second_pred != NULL) { \
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { \
DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); \
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, \
y_stride); \
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, \
sse1); \
} else { \
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
} \
} else { \
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
} \
*distortion = besterr; \
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
#else
#define SETUP_CENTER_ERROR \
if (second_pred != NULL) { \
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
} else { \
besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
} \
*distortion = besterr; \
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
#endif // CONFIG_VP9_HIGHBITDEPTH
static
INLINE
int
divide_and_round
(
const
int
n
,
const
int
d
)
{
return
((
n
<
0
)
^
(
d
<
0
))
?
((
n
-
d
/
2
)
/
d
)
:
((
n
+
d
/
2
)
/
d
);
}
static
INLINE
int
is_sad_list_wellbehaved
(
int
*
sad_list
)
{
return
sad_list
[
0
]
>=
sad_list
[
1
]
&&
sad_list
[
0
]
>=
sad_list
[
2
]
&&
sad_list
[
0
]
>=
sad_list
[
3
]
&&
sad_list
[
0
]
>=
sad_list
[
4
];
}
// Returns surface minima estimate at given precision in 1/2^n bits.
// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
// For a given set of costs S0, S1, S2, S3, S4 at points
// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
// the solution for the location of the minima (x0, y0) is given by:
// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
// The code below is an integerized version of that.
static
void
get_cost_surf_min
(
int
*
sad_list
,
int
*
ir
,
int
*
ic
,
int
bits
)
{
*
ic
=
divide_and_round
((
sad_list
[
1
]
-
sad_list
[
3
])
*
(
1
<<
(
bits
-
1
)),
(
sad_list
[
1
]
-
2
*
sad_list
[
0
]
+
sad_list
[
3
]));
*
ir
=
divide_and_round
((
sad_list
[
4
]
-
sad_list
[
2
])
*
(
1
<<
(
bits
-
1
)),
(
sad_list
[
4
]
-
2
*
sad_list
[
0
]
+
sad_list
[
2
]));
}
int
vp9_find_best_sub_pixel_surface_fit
(
const
MACROBLOCK
*
x
,
MV
*
bestmv
,
const
MV
*
ref_mv
,
int
allow_hp
,
int
error_per_bit
,
...
...
@@ -300,30 +364,127 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
const
uint8_t
*
second_pred
,
int
w
,
int
h
)
{
SETUP_SUBPEL_SEARCH
;
if
(
second_pred
!=
NULL
)
{
#if CONFIG_VP9_HIGHBITDEPTH
if
(
xd
->
cur_buf
->
flags
&
YV12_FLAG_HIGHBITDEPTH
)
{
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
comp_pred16
,
64
*
64
);
vp9_high_comp_avg_pred
(
comp_pred16
,
second_pred
,
w
,
h
,
y
+
offset
,
y_stride
);
besterr
=
vfp
->
vf
(
CONVERT_TO_BYTEPTR
(
comp_pred16
),
w
,
z
,
src_stride
,
sse1
);
}
else
{
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
comp_pred
,
64
*
64
);
vp9_comp_avg_pred
(
comp_pred
,
second_pred
,
w
,
h
,
y
+
offset
,
y_stride
);
besterr
=
vfp
->
vf
(
comp_pred
,
w
,
z
,
src_stride
,
sse1
);
SETUP_CENTER_ERROR
;
(
void
)
halfiters
;
(
void
)
quarteriters
;
(
void
)
eighthiters
;
(
void
)
whichdir
;
(
void
)
allow_hp
;
(
void
)
forced_stop
;
(
void
)
hstep
;
if
(
sad_list
&&
sad_list
[
0
]
!=
INT_MAX
&&
sad_list
[
1
]
!=
INT_MAX
&&
sad_list
[
2
]
!=
INT_MAX
&&
sad_list
[
3
]
!=
INT_MAX
&&
sad_list
[
4
]
!=
INT_MAX
&&
is_sad_list_wellbehaved
(
sad_list
))
{
int
ir
,
ic
;
unsigned
int
minpt
;
get_cost_surf_min
(
sad_list
,
&
ir
,
&
ic
,
3
);
if
(
ir
!=
0
||
ic
!=
0
)
{
CHECK_BETTER
(
minpt
,
tr
+
ir
,
tc
+
ic
);
}
}
bestmv
->
row
=
br
;
bestmv
->
col
=
bc
;
if
((
abs
(
bestmv
->
col
-
ref_mv
->
col
)
>
(
MAX_FULL_PEL_VAL
<<
3
))
||
(
abs
(
bestmv
->
row
-
ref_mv
->
row
)
>
(
MAX_FULL_PEL_VAL
<<
3
)))
return
INT_MAX
;
return
besterr
;
}
int
vp9_find_best_sub_pixel_tree_pruned_more
(
const
MACROBLOCK
*
x
,
MV
*
bestmv
,
const
MV
*
ref_mv
,
int
allow_hp
,
int
error_per_bit
,
const
vp9_variance_fn_ptr_t
*
vfp
,
int
forced_stop
,
int
iters_per_step
,
int
*
sad_list
,
int
*
mvjcost
,
int
*
mvcost
[
2
],
int
*
distortion
,
unsigned
int
*
sse1
,
const
uint8_t
*
second_pred
,
int
w
,
int
h
)
{
SETUP_SUBPEL_SEARCH
;
SETUP_CENTER_ERROR
;
if
(
sad_list
&&
sad_list
[
0
]
!=
INT_MAX
&&
sad_list
[
1
]
!=
INT_MAX
&&
sad_list
[
2
]
!=
INT_MAX
&&
sad_list
[
3
]
!=
INT_MAX
&&
sad_list
[
4
]
!=
INT_MAX
&&
is_sad_list_wellbehaved
(
sad_list
))
{
unsigned
int
minpt
;
int
ir
,
ic
;
get_cost_surf_min
(
sad_list
,
&
ir
,
&
ic
,
1
);
if
(
ir
!=
0
||
ic
!=
0
)
{
CHECK_BETTER
(
minpt
,
tr
+
ir
*
hstep
,
tc
+
ic
*
hstep
);
}
#else
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
comp_pred
,
64
*
64
);
vp9_comp_avg_pred
(
comp_pred
,
second_pred
,
w
,
h
,
y
+
offset
,
y_stride
);
besterr
=
vfp
->
vf
(
comp_pred
,
w
,
z
,
src_stride
,
sse1
);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
else
{
besterr
=
vfp
->
vf
(
y
+
offset
,
y_stride
,
z
,
src_stride
,
sse1
);
FIRST_LEVEL_CHECKS
;
if
(
halfiters
>
1
)
{
SECOND_LEVEL_CHECKS
;
}
}
*
distortion
=
besterr
;
besterr
+=
mv_err_cost
(
bestmv
,
ref_mv
,
mvjcost
,
mvcost
,
error_per_bit
);
tr
=
br
;
tc
=
bc
;
// Each subsequent iteration checks at least one point in common with
// the last iteration could be 2 ( if diag selected) 1/4 pel
// Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
if
(
forced_stop
!=
2
)
{
hstep
>>=
1
;
FIRST_LEVEL_CHECKS
;
if
(
quarteriters
>
1
)
{
SECOND_LEVEL_CHECKS
;
}
tr
=
br
;
tc
=
bc
;
}
if
(
allow_hp
&&
vp9_use_mv_hp
(
ref_mv
)
&&
forced_stop
==
0
)
{
hstep
>>=
1
;
FIRST_LEVEL_CHECKS
;
if
(
eighthiters
>
1
)
{
SECOND_LEVEL_CHECKS
;
}
tr
=
br
;
tc
=
bc
;
}
// These lines insure static analysis doesn't warn that
// tr and tc aren't used after the above point.
(
void
)
tr
;
(
void
)
tc
;
bestmv
->
row
=
br
;
bestmv
->
col
=
bc
;
if
((
abs
(
bestmv
->
col
-
ref_mv
->
col
)
>
(
MAX_FULL_PEL_VAL
<<
3
))
||
(
abs
(
bestmv
->
row
-
ref_mv
->
row
)
>
(
MAX_FULL_PEL_VAL
<<
3
)))
return
INT_MAX
;
return
besterr
;
}
int
vp9_find_best_sub_pixel_tree_pruned
(
const
MACROBLOCK
*
x
,
MV
*
bestmv
,
const
MV
*
ref_mv
,
int
allow_hp
,
int
error_per_bit
,
const
vp9_variance_fn_ptr_t
*
vfp
,
int
forced_stop
,
int
iters_per_step
,
int
*
sad_list
,
int
*
mvjcost
,
int
*
mvcost
[
2
],
int
*
distortion
,
unsigned
int
*
sse1
,
const
uint8_t
*
second_pred
,
int
w
,
int
h
)
{
SETUP_SUBPEL_SEARCH
;
SETUP_CENTER_ERROR
;
if
(
sad_list
&&
sad_list
[
0
]
!=
INT_MAX
&&
sad_list
[
1
]
!=
INT_MAX
&&
sad_list
[
2
]
!=
INT_MAX
&&
sad_list
[
3
]
!=
INT_MAX
&&
...
...
@@ -415,29 +576,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
const
uint8_t
*
second_pred
,
int
w
,
int
h
)
{
SETUP_SUBPEL_SEARCH
;
if
(
second_pred
!=
NULL
)
{
#if CONFIG_VP9_HIGHBITDEPTH
if
(
xd
->
cur_buf
->
flags
&
YV12_FLAG_HIGHBITDEPTH
)
{
DECLARE_ALIGNED_ARRAY
(
16
,
uint16_t
,
comp_pred16
,
64
*
64
);
vp9_high_comp_avg_pred
(
comp_pred16
,
second_pred
,
w
,
h
,
y
+
offset
,
y_stride
);
besterr
=
vfp
->
vf
(
CONVERT_TO_BYTEPTR
(
comp_pred16
),
w
,
z
,
src_stride
,
sse1
);
}
else
{
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
comp_pred
,
64
*
64
);
vp9_comp_avg_pred
(
comp_pred
,
second_pred
,
w
,
h
,
y
+
offset
,
y_stride
);
besterr
=
vfp
->
vf
(
comp_pred
,
w
,
z
,
src_stride
,
sse1
);
}
#else
DECLARE_ALIGNED_ARRAY
(
16
,
uint8_t
,
comp_pred
,
64
*
64
);
vp9_comp_avg_pred
(
comp_pred
,
second_pred
,
w
,
h
,
y
+
offset
,
y_stride
);
besterr
=
vfp
->
vf
(
comp_pred
,
w
,
z
,
src_stride
,
sse1
);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
else
{
besterr
=
vfp
->
vf
(
y
+
offset
,
y_stride
,
z
,
src_stride
,
sse1
);
}
*
distortion
=
besterr
;
besterr
+=
mv_err_cost
(
bestmv
,
ref_mv
,
mvjcost
,
mvcost
,
error_per_bit
);
SETUP_CENTER_ERROR
;
(
void
)
sad_list
;
// to silence compiler warning
// Each subsequent iteration checks at least one point in
...
...
vp9/encoder/vp9_mcomp.h
View file @
63e49be3
...
...
@@ -108,6 +108,8 @@ typedef int (fractional_mv_step_fp) (
extern
fractional_mv_step_fp
vp9_find_best_sub_pixel_tree
;
extern
fractional_mv_step_fp
vp9_find_best_sub_pixel_tree_pruned
;
extern
fractional_mv_step_fp
vp9_find_best_sub_pixel_tree_pruned_more
;
extern
fractional_mv_step_fp
vp9_find_best_sub_pixel_surface_fit
;
typedef
int
(
*
vp9_full_search_fn_t
)(
const
MACROBLOCK
*
x
,
const
MV
*
ref_mv
,
int
sad_per_bit
,
...
...
vp9/encoder/vp9_speed_features.c
View file @
63e49be3
...
...
@@ -421,6 +421,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
cpi
->
find_fractional_mv_step
=
vp9_find_best_sub_pixel_tree
;
}
else
if
(
sf
->
mv
.
subpel_search_method
==
SUBPEL_TREE_PRUNED
)
{
cpi
->
find_fractional_mv_step
=
vp9_find_best_sub_pixel_tree_pruned
;
}
else
if
(
sf
->
mv
.
subpel_search_method
==
SUBPEL_TREE_PRUNED_MORE
)
{
cpi
->
find_fractional_mv_step
=
vp9_find_best_sub_pixel_tree_pruned_more
;
}
else
if
(
sf
->
mv
.
subpel_search_method
==
SUBPEL_SURFACE_FIT
)
{
cpi
->
find_fractional_mv_step
=
vp9_find_best_sub_pixel_surface_fit
;
}
cpi
->
mb
.
optimize
=
sf
->
optimize_coefficients
==
1
&&
oxcf
->
pass
!=
1
;
...
...
vp9/encoder/vp9_speed_features.h
View file @
63e49be3
...
...
@@ -79,6 +79,8 @@ typedef enum {
typedef
enum
{
SUBPEL_TREE
=
0
,
SUBPEL_TREE_PRUNED
=
1
,
SUBPEL_TREE_PRUNED_MORE
=
2
,
SUBPEL_SURFACE_FIT
=
3
,
// Other methods to come
}
SUBPEL_SEARCH_METHODS
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment