Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
87aec58f
Commit
87aec58f
authored
Jul 06, 2016
by
Yue Chen
Committed by
Gerrit Code Review
Jul 06, 2016
Browse files
Merge "Refactoring in preparation for OBMC optimizations." into nextgenv2
parents
4a187713
007aa7dd
Changes
9
Hide whitespace changes
Inline
Side-by-side
vp10/encoder/encoder.c
View file @
87aec58f
...
...
@@ -1228,19 +1228,16 @@ MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
#define MAKE_OBFP_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride); \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk); \
} \
static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 2; \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk) >> 2; \
} \
static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 4; \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk) >> 4; \
}
#if CONFIG_EXT_PARTITION
...
...
vp10/encoder/mcomp.c
View file @
87aec58f
...
...
@@ -3104,8 +3104,7 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
/* returns subpixel variance error function */
#define DIST(r, c) \
vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
src_stride, mask, mask_stride, &sse)
vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
/* checks if (r, c) has better score than previous best */
#define MVC(r, c) \
...
...
@@ -3135,8 +3134,8 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
#define CHECK_BETTER1(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
thismse = upsampled_obmc_pref_error(xd, \
mask,
mask_stride,
\
vfp, z,
src_stride,
\
mask,
\
vfp, z,
\
upre(y, y_stride, r, c), \
y_stride, \
w, h, &sse); \
...
...
@@ -3151,14 +3150,12 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
v = INT_MAX; \
}
static
unsigned
int
setup_obmc_center_error
(
const
int
*
mask
,
int
mask_stride
,
static
unsigned
int
setup_obmc_center_error
(
const
int32_t
*
mask
,
const
MV
*
bestmv
,
const
MV
*
ref_mv
,
int
error_per_bit
,
const
vp10_variance_fn_ptr_t
*
vfp
,
const
int
*
const
wsrc
,
const
int
wsrc_stride
,
const
int32_t
*
const
wsrc
,
const
uint8_t
*
const
y
,
int
y_stride
,
int
offset
,
...
...
@@ -3166,18 +3163,16 @@ static unsigned int setup_obmc_center_error(const int *mask,
unsigned
int
*
sse1
,
int
*
distortion
)
{
unsigned
int
besterr
;
besterr
=
vfp
->
ovf
(
y
+
offset
,
y_stride
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
sse1
);
besterr
=
vfp
->
ovf
(
y
+
offset
,
y_stride
,
wsrc
,
mask
,
sse1
);
*
distortion
=
besterr
;
besterr
+=
mv_err_cost
(
bestmv
,
ref_mv
,
mvjcost
,
mvcost
,
error_per_bit
);
return
besterr
;
}
static
int
upsampled_obmc_pref_error
(
const
MACROBLOCKD
*
xd
,
const
int
*
mask
,
int
mask_stride
,
const
int
32_t
*
mask
,
const
vp10_variance_fn_ptr_t
*
vfp
,
const
int
*
const
wsrc
,
const
int
wsrc_stride
,
const
int32_t
*
const
wsrc
,
const
uint8_t
*
const
y
,
int
y_stride
,
int
w
,
int
h
,
unsigned
int
*
sse
)
{
unsigned
int
besterr
;
...
...
@@ -3186,8 +3181,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
DECLARE_ALIGNED
(
16
,
uint16_t
,
pred16
[
MAX_SB_SQUARE
]);
vpx_highbd_upsampled_pred
(
pred16
,
w
,
h
,
y
,
y_stride
);
besterr
=
vfp
->
ovf
(
CONVERT_TO_BYTEPTR
(
pred16
),
w
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
sse
);
besterr
=
vfp
->
ovf
(
CONVERT_TO_BYTEPTR
(
pred16
),
w
,
wsrc
,
mask
,
sse
);
}
else
{
DECLARE_ALIGNED
(
16
,
uint8_t
,
pred
[
MAX_SB_SQUARE
]);
#else
...
...
@@ -3196,7 +3190,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
#endif // CONFIG_VP9_HIGHBITDEPTH
vpx_upsampled_pred
(
pred
,
w
,
h
,
y
,
y_stride
);
besterr
=
vfp
->
ovf
(
pred
,
w
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
sse
);
besterr
=
vfp
->
ovf
(
pred
,
w
,
wsrc
,
mask
,
sse
);
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif
...
...
@@ -3205,15 +3199,14 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
static
unsigned
int
upsampled_setup_obmc_center_error
(
const
MACROBLOCKD
*
xd
,
const
int
*
mask
,
int
mask_stride
,
const
int
32_t
*
mask
,
const
MV
*
bestmv
,
const
MV
*
ref_mv
,
int
error_per_bit
,
const
vp10_variance_fn_ptr_t
*
vfp
,
const
int
*
const
wsrc
,
const
int
wsrc_stride
,
const
int
32_t
*
const
wsrc
,
const
uint8_t
*
const
y
,
int
y_stride
,
int
w
,
int
h
,
int
offset
,
int
*
mvjcost
,
int
*
mvcost
[
2
],
unsigned
int
*
sse1
,
int
*
distortion
)
{
unsigned
int
besterr
=
upsampled_obmc_pref_error
(
xd
,
mask
,
mask_stride
,
vfp
,
wsrc
,
wsrc_stride
,
unsigned
int
besterr
=
upsampled_obmc_pref_error
(
xd
,
mask
,
vfp
,
wsrc
,
y
+
offset
,
y_stride
,
w
,
h
,
sse1
);
*
distortion
=
besterr
;
...
...
@@ -3222,8 +3215,8 @@ static unsigned int upsampled_setup_obmc_center_error(
}
int
vp10_find_best_obmc_sub_pixel_tree_up
(
VP10_COMP
*
cpi
,
MACROBLOCK
*
x
,
const
int
*
wsrc
,
int
wsrc_stride
,
const
int
*
mask
,
int
mask_stride
,
const
int
32_t
*
wsrc
,
const
int
32_t
*
mask
,
int
mi_row
,
int
mi_col
,
MV
*
bestmv
,
const
MV
*
ref_mv
,
int
allow_hp
,
int
error_per_bit
,
...
...
@@ -3235,7 +3228,6 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
int
use_upsampled_ref
)
{
const
int
*
const
z
=
wsrc
;
const
int
*
const
src_address
=
z
;
const
int
src_stride
=
wsrc_stride
;
MACROBLOCKD
*
xd
=
&
x
->
e_mbd
;
struct
macroblockd_plane
*
const
pd
=
&
xd
->
plane
[
0
];
MB_MODE_INFO
*
mbmi
=
&
xd
->
mi
[
0
]
->
mbmi
;
...
...
@@ -3287,14 +3279,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
// use_upsampled_ref can be 0 or 1
if
(
use_upsampled_ref
)
besterr
=
upsampled_setup_obmc_center_error
(
xd
,
mask
,
mask_stride
,
bestmv
,
ref_mv
,
error_per_bit
,
vfp
,
z
,
src_stride
,
y
,
y_stride
,
xd
,
mask
,
bestmv
,
ref_mv
,
error_per_bit
,
vfp
,
z
,
y
,
y_stride
,
w
,
h
,
(
offset
<<
3
),
mvjcost
,
mvcost
,
sse1
,
distortion
);
else
besterr
=
setup_obmc_center_error
(
mask
,
mask_stride
,
bestmv
,
ref_mv
,
error_per_bit
,
vfp
,
z
,
src_stride
,
y
,
y_stride
,
mask
,
bestmv
,
ref_mv
,
error_per_bit
,
vfp
,
z
,
y
,
y_stride
,
offset
,
mvjcost
,
mvcost
,
sse1
,
distortion
);
for
(
iter
=
0
;
iter
<
round
;
++
iter
)
{
...
...
@@ -3308,16 +3300,15 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
if
(
use_upsampled_ref
)
{
const
uint8_t
*
const
pre_address
=
y
+
tr
*
y_stride
+
tc
;
thismse
=
upsampled_obmc_pref_error
(
xd
,
mask
,
mask_stride
,
vfp
,
src_address
,
src_stride
,
thismse
=
upsampled_obmc_pref_error
(
xd
,
mask
,
vfp
,
src_address
,
pre_address
,
y_stride
,
w
,
h
,
&
sse
);
}
else
{
const
uint8_t
*
const
pre_address
=
y
+
(
tr
>>
3
)
*
y_stride
+
(
tc
>>
3
);
thismse
=
vfp
->
osvf
(
pre_address
,
y_stride
,
sp
(
tc
),
sp
(
tr
),
src_address
,
src_stride
,
mask
,
mask_stride
,
&
sse
);
src_address
,
mask
,
&
sse
);
}
cost_array
[
idx
]
=
thismse
+
...
...
@@ -3345,15 +3336,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
if
(
use_upsampled_ref
)
{
const
uint8_t
*
const
pre_address
=
y
+
tr
*
y_stride
+
tc
;
thismse
=
upsampled_obmc_pref_error
(
xd
,
mask
,
mask_stride
,
vfp
,
src_address
,
src_stride
,
thismse
=
upsampled_obmc_pref_error
(
xd
,
mask
,
vfp
,
src_address
,
pre_address
,
y_stride
,
w
,
h
,
&
sse
);
}
else
{
const
uint8_t
*
const
pre_address
=
y
+
(
tr
>>
3
)
*
y_stride
+
(
tc
>>
3
);
thismse
=
vfp
->
osvf
(
pre_address
,
y_stride
,
sp
(
tc
),
sp
(
tr
),
src_address
,
src_stride
,
mask
,
mask_stride
,
&
sse
);
src_address
,
mask
,
&
sse
);
}
cost_array
[
4
]
=
thismse
+
...
...
@@ -3417,8 +3407,8 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
#undef CHECK_BETTER
static
int
get_obmc_mvpred_var
(
const
MACROBLOCK
*
x
,
const
int
*
wsrc
,
int
wsrc_stride
,
const
int
*
mask
,
int
mask_stride
,
const
int
32_t
*
wsrc
,
const
int
32_t
*
mask
,
const
MV
*
best_mv
,
const
MV
*
center_mv
,
const
vp10_variance_fn_ptr_t
*
vfp
,
int
use_mvcost
,
int
is_second
)
{
...
...
@@ -3428,14 +3418,14 @@ static int get_obmc_mvpred_var(const MACROBLOCK *x,
unsigned
int
unused
;
return
vfp
->
ovf
(
get_buf_from_mv
(
in_what
,
best_mv
),
in_what
->
stride
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
&
unused
)
+
wsrc
,
mask
,
&
unused
)
+
(
use_mvcost
?
mv_err_cost
(
&
mv
,
center_mv
,
x
->
nmvjointcost
,
x
->
mvcost
,
x
->
errorperbit
)
:
0
);
}
int
obmc_refining_search_sad
(
const
MACROBLOCK
*
x
,
const
int
*
wsrc
,
int
wsrc_stride
,
const
int
*
mask
,
int
mask_stride
,
const
int
32_t
*
wsrc
,
const
int
32_t
*
mask
,
MV
*
ref_mv
,
int
error_per_bit
,
int
search_range
,
const
vp10_variance_fn_ptr_t
*
fn_ptr
,
...
...
@@ -3445,8 +3435,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
const
struct
buf_2d
*
const
in_what
=
&
xd
->
plane
[
0
].
pre
[
is_second
];
const
MV
fcenter_mv
=
{
center_mv
->
row
>>
3
,
center_mv
->
col
>>
3
};
unsigned
int
best_sad
=
fn_ptr
->
osdf
(
get_buf_from_mv
(
in_what
,
ref_mv
),
in_what
->
stride
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
)
+
in_what
->
stride
,
wsrc
,
mask
)
+
mvsad_err_cost
(
x
,
ref_mv
,
&
fcenter_mv
,
error_per_bit
);
int
i
,
j
;
...
...
@@ -3458,8 +3447,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
ref_mv
->
col
+
neighbors
[
j
].
col
};
if
(
is_mv_in
(
x
,
&
mv
))
{
unsigned
int
sad
=
fn_ptr
->
osdf
(
get_buf_from_mv
(
in_what
,
&
mv
),
in_what
->
stride
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
);
in_what
->
stride
,
wsrc
,
mask
);
if
(
sad
<
best_sad
)
{
sad
+=
mvsad_err_cost
(
x
,
&
mv
,
&
fcenter_mv
,
error_per_bit
);
if
(
sad
<
best_sad
)
{
...
...
@@ -3482,8 +3470,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
int
obmc_diamond_search_sad
(
const
MACROBLOCK
*
x
,
const
search_site_config
*
cfg
,
const
int
*
wsrc
,
int
wsrc_stride
,
const
int
*
mask
,
int
mask_stride
,
const
int32_t
*
wsrc
,
const
int32_t
*
mask
,
MV
*
ref_mv
,
MV
*
best_mv
,
int
search_param
,
int
sad_per_bit
,
int
*
num00
,
...
...
@@ -3511,8 +3498,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
*
best_mv
=
*
ref_mv
;
// Check the starting position
best_sad
=
fn_ptr
->
osdf
(
best_address
,
in_what
->
stride
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
)
+
best_sad
=
fn_ptr
->
osdf
(
best_address
,
in_what
->
stride
,
wsrc
,
mask
)
+
mvsad_err_cost
(
x
,
best_mv
,
&
fcenter_mv
,
sad_per_bit
);
i
=
1
;
...
...
@@ -3523,7 +3509,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
best_mv
->
col
+
ss
[
i
].
mv
.
col
};
if
(
is_mv_in
(
x
,
&
mv
))
{
int
sad
=
fn_ptr
->
osdf
(
best_address
+
ss
[
i
].
offset
,
in_what
->
stride
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
);
wsrc
,
mask
);
if
(
sad
<
best_sad
)
{
sad
+=
mvsad_err_cost
(
x
,
&
mv
,
&
fcenter_mv
,
sad_per_bit
);
if
(
sad
<
best_sad
)
{
...
...
@@ -3547,8 +3533,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
best_mv
->
col
+
ss
[
best_site
].
mv
.
col
};
if
(
is_mv_in
(
x
,
&
this_mv
))
{
int
sad
=
fn_ptr
->
osdf
(
best_address
+
ss
[
best_site
].
offset
,
in_what
->
stride
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
);
in_what
->
stride
,
wsrc
,
mask
);
if
(
sad
<
best_sad
)
{
sad
+=
mvsad_err_cost
(
x
,
&
this_mv
,
&
fcenter_mv
,
sad_per_bit
);
if
(
sad
<
best_sad
)
{
...
...
@@ -3571,8 +3556,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
}
int
vp10_obmc_full_pixel_diamond
(
const
VP10_COMP
*
cpi
,
MACROBLOCK
*
x
,
const
int
*
wsrc
,
int
wsrc_stride
,
const
int
*
mask
,
int
mask_stride
,
const
int32_t
*
wsrc
,
const
int32_t
*
mask
,
MV
*
mvp_full
,
int
step_param
,
int
sadpb
,
int
further_steps
,
int
do_refine
,
const
vp10_variance_fn_ptr_t
*
fn_ptr
,
...
...
@@ -3581,13 +3565,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
MV
temp_mv
;
int
thissme
,
n
,
num00
=
0
;
int
bestsme
=
obmc_diamond_search_sad
(
x
,
&
cpi
->
ss_cfg
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
wsrc
,
mask
,
mvp_full
,
&
temp_mv
,
step_param
,
sadpb
,
&
n
,
fn_ptr
,
ref_mv
,
is_second
);
if
(
bestsme
<
INT_MAX
)
bestsme
=
get_obmc_mvpred_var
(
x
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
bestsme
=
get_obmc_mvpred_var
(
x
,
wsrc
,
mask
,
&
temp_mv
,
ref_mv
,
fn_ptr
,
1
,
is_second
);
*
dst_mv
=
temp_mv
;
...
...
@@ -3603,13 +3586,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
num00
--
;
}
else
{
thissme
=
obmc_diamond_search_sad
(
x
,
&
cpi
->
ss_cfg
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
wsrc
,
mask
,
mvp_full
,
&
temp_mv
,
step_param
+
n
,
sadpb
,
&
num00
,
fn_ptr
,
ref_mv
,
is_second
);
if
(
thissme
<
INT_MAX
)
thissme
=
get_obmc_mvpred_var
(
x
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
thissme
=
get_obmc_mvpred_var
(
x
,
wsrc
,
mask
,
&
temp_mv
,
ref_mv
,
fn_ptr
,
1
,
is_second
);
// check to see if refining search is needed.
...
...
@@ -3627,11 +3609,11 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
if
(
do_refine
)
{
const
int
search_range
=
8
;
MV
best_mv
=
*
dst_mv
;
thissme
=
obmc_refining_search_sad
(
x
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
thissme
=
obmc_refining_search_sad
(
x
,
wsrc
,
mask
,
&
best_mv
,
sadpb
,
search_range
,
fn_ptr
,
ref_mv
,
is_second
);
if
(
thissme
<
INT_MAX
)
thissme
=
get_obmc_mvpred_var
(
x
,
wsrc
,
wsrc_stride
,
mask
,
mask_stride
,
thissme
=
get_obmc_mvpred_var
(
x
,
wsrc
,
mask
,
&
best_mv
,
ref_mv
,
fn_ptr
,
1
,
is_second
);
if
(
thissme
<
bestsme
)
{
bestsme
=
thissme
;
...
...
vp10/encoder/mcomp.h
View file @
87aec58f
...
...
@@ -198,16 +198,16 @@ int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
int
vp10_obmc_full_pixel_diamond
(
const
struct
VP10_COMP
*
cpi
,
MACROBLOCK
*
x
,
const
int
*
wsrc
,
int
wsrc_stride
,
const
int
*
mask
,
int
mask_stride
,
const
int
32_t
*
wsrc
,
const
int
32_t
*
mask
,
MV
*
mvp_full
,
int
step_param
,
int
sadpb
,
int
further_steps
,
int
do_refine
,
const
vp10_variance_fn_ptr_t
*
fn_ptr
,
const
MV
*
ref_mv
,
MV
*
dst_mv
,
int
is_second
);
int
vp10_find_best_obmc_sub_pixel_tree_up
(
struct
VP10_COMP
*
cpi
,
MACROBLOCK
*
x
,
const
int
*
wsrc
,
int
wsrc_stride
,
const
int
*
mask
,
int
mask_stride
,
const
int
32_t
*
wsrc
,
const
int
32_t
*
mask
,
int
mi_row
,
int
mi_col
,
MV
*
bestmv
,
const
MV
*
ref_mv
,
int
allow_hp
,
int
error_per_bit
,
...
...
vp10/encoder/rdopt.c
View file @
87aec58f
...
...
@@ -6073,8 +6073,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
#if CONFIG_OBMC
static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
const int* wsrc, int wsrc_stride,
const int* mask, int mask_stride,
const int32_t* wsrc, const int32_t* mask,
#if CONFIG_EXT_INTER
int ref_idx,
int mv_idx,
...
...
@@ -6173,8 +6172,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
mvp_full.col >>= 3;
mvp_full.row >>= 3;
bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, wsrc_stride,
mask, mask_stride,
bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, mask,
&mvp_full, step_param, sadpb,
MAX_MVSEARCH_STEPS - 1 - step_param,
1, &cpi->fn_ptr[bsize],
...
...
@@ -6188,8 +6186,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis;
vp10_find_best_obmc_sub_pixel_tree_up(cpi, x,
wsrc, wsrc_stride,
mask, mask_stride,
wsrc, mask,
mi_row, mi_col,
&tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
...
...
@@ -6796,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
uint8_t *dst_buf1[3], int dst_stride1[3],
uint8_t *dst_buf2[3], int dst_stride2[3],
int *wsrc, int wsrc_strides
,
int *mask2d, int mask2d_strides
,
const int32_t *const wsrc
,
const int32_t *const mask2d
,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
int_mv single_newmvs[2][MAX_REF_FRAMES],
...
...
@@ -7739,8 +7736,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
pred_mv.as_int = mbmi->mv[0].as_int;
single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col,
wsrc, wsrc_strides,
mask2d, mask2d_strides,
wsrc, mask2d,
#if CONFIG_EXT_INTER
0, mv_idx,
#endif // CONFIG_EXT_INTER
...
...
@@ -8494,13 +8490,11 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, int, weighted_src_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int, mask2d_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int
32_t
, weighted_src_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int
32_t
, mask2d_buf[MAX_SB_SQUARE]);
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int weighted_src_stride = MAX_SB_SIZE;
int mask2d_stride = MAX_SB_SIZE;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
...
...
@@ -8605,8 +8599,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
calc_target_weighted_pred(cm, x, xd, mi_row, mi_col,
dst_buf1[0], dst_stride1[0],
dst_buf2[0], dst_stride2[0],
mask2d_buf, mask2d_stride,
weighted_src_buf, weighted_src_stride);
mask2d_buf, weighted_src_buf);
#endif // CONFIG_OBMC
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
...
...
@@ -9143,8 +9136,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
#if CONFIG_OBMC
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf,
weighted_src_stride,
mask2d_buf,
mask2d_stride,
weighted_src_buf,
mask2d_buf,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
single_newmvs,
...
...
@@ -9258,8 +9251,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf,
weighted_src_stride,
mask2d_buf, mask2d_stride,
mask2d_buf,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
dummy_single_newmvs,
...
...
@@ -10970,14 +10962,16 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int *mask_buf,
int mask_stride,
int *weighted_src_buf
, int weighted_src_stride
) {
int
32_t
*mask_buf,
int
32_t
*weighted_src_buf) {
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i, mi_step;
int bw = 8 * xd->n8_w;
int bh = 8 * xd->n8_h;
int *dst = weighted_src_buf;
int *mask2d = mask_buf;
const int mask_stride = bw;
const int weighted_src_stride = bw;
int32_t *dst = weighted_src_buf;
int32_t *mask2d = mask_buf;
uint8_t *src;
#if CONFIG_VP9_HIGHBITDEPTH
int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
...
...
@@ -11009,11 +11003,11 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int
32_t
*dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int tmp_stride = above_stride;
uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
int
32_t
*mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
const uint8_t *mask1d[2];
setup_obmc_mask(bh, mask1d);
...
...
@@ -11078,14 +11072,14 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int bw = overlap >> pd->subsampling_x;
int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf +
int
32_t
*dst = weighted_src_buf +
(i * MI_SIZE * dst_stride >> pd->subsampling_y);
int tmp_stride = left_stride;
uint8_t *tmp = left_buf +
(i * MI_SIZE * tmp_stride >> pd->subsampling_y);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf +
(i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
int
32_t
*mask2d = mask_buf +
(i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
const uint8_t *mask1d[2];
setup_obmc_mask(bw, mask1d);
...
...
vp10/encoder/rdopt.h
View file @
87aec58f
...
...
@@ -97,8 +97,7 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int
mi_row
,
int
mi_col
,
uint8_t
*
above_buf
,
int
above_stride
,
uint8_t
*
left_buf
,
int
left_stride
,
int
*
mask_buf
,
int
mask_stride
,
int
*
weighted_src_buf
,
int
weighted_src_stride
);
int32_t
*
mask_buf
,
int32_t
*
weighted_src_buf
);
#endif // CONFIG_OBMC
#ifdef __cplusplus
...
...
vpx_dsp/sad.c
View file @
87aec58f
...
...
@@ -456,21 +456,19 @@ HIGHBD_MASKSADMXN(4, 4)
// b: target weighted prediction (has been *4096 to keep precision)
// m: 2d weights (scaled by 4096)
static
INLINE
unsigned
int
obmc_sad
(
const
uint8_t
*
a
,
int
a_stride
,
const
int
*
b
,
int
b_stride
,
const
int
*
m
,
int
m_stride
,
const
int
32_t
*
b
,
const
int
32_t
*
m
,
int
width
,
int
height
)
{
int
y
,
x
;
unsigned
int
sad
=
0
;
for
(
y
=
0
;
y
<
height
;
y
++
)
{
for
(
x
=
0
;
x
<
width
;
x
++
)
{
int
abs_diff
=
abs
(
b
[
x
]
-
a
[
x
]
*
m
[
x
]);
sad
+=
(
abs_diff
+
2048
)
>>
12
;
}
for
(
x
=
0
;
x
<
width
;
x
++
)
sad
+=
ROUND_POWER_OF_TWO
(
abs
(
b
[
x
]
-
a
[
x
]
*
m
[
x
]),
12
);
a
+=
a_stride
;
b
+=
b_stride
;
m
+=
m_stride
;
b
+=
width
;
m
+=
width
;
}
return
sad
;
...
...
@@ -478,9 +476,9 @@ static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
#define OBMCSADMxN(m, n) \
unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
const int *wsrc,
int wsrc_stride,
\
const int
*msk, in
t msk
_stride) {
\
return obmc_sad(ref, ref_stride, wsrc,
wsrc_stride, msk, msk_stride, m, n);
\
const int
32_t
*wsrc,
\
const int
32_
t
*
msk
) {
\
return obmc_sad(ref, ref_stride, wsrc,
msk, m, n);
\
}
#if CONFIG_EXT_PARTITION
...
...
@@ -504,22 +502,20 @@ OBMCSADMxN(4, 4)
#if CONFIG_VP9_HIGHBITDEPTH
static
INLINE
unsigned
int
highbd_obmc_sad
(
const
uint8_t
*
a8
,
int
a_stride
,
const
int
*
b
,
int
b_stride
,
const
int
*
m
,
int
m_stride
,
const
int
32_t
*
b
,
const
int
32_t
*
m
,
int
width
,
int
height
)
{
int
y
,
x
;
unsigned
int
sad
=
0
;
const
uint16_t
*
a
=
CONVERT_TO_SHORTPTR
(
a8
);
for
(
y
=
0
;
y
<
height
;
y
++
)
{
for
(
x
=
0
;
x
<
width
;
x
++
)
{
int
abs_diff
=
abs
(
b
[
x
]
-
a
[
x
]
*
m
[
x
]);
sad
+=
(
abs_diff
+
2048
)
>>
12
;
}
for
(
x
=
0
;
x
<
width
;
x
++
)
sad
+=
ROUND_POWER_OF_TWO
(
abs
(
b
[
x
]
-
a
[
x
]
*
m
[
x
]),
12
);
a
+=
a_stride
;
b
+=
b_stride
;
m
+=
m_stride
;
b
+=
width
;
m
+=
width
;
}
return
sad
;
...
...
@@ -528,12 +524,9 @@ static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
#define HIGHBD_OBMCSADMXN(m, n) \
unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref, \
int ref_stride, \
const int *wsrc, \
int wsrc_stride, \
const int *msk, \
int msk_stride) { \
return highbd_obmc_sad(ref, ref_stride, wsrc, wsrc_stride, \
msk, msk_stride, m, n); \
const int32_t *wsrc, \
const int32_t *msk) { \
return highbd_obmc_sad(ref, ref_stride, wsrc, msk, m, n); \
}
#if CONFIG_EXT_PARTITION
...
...
vpx_dsp/variance.c
View file @
87aec58f
...
...
@@ -1026,8 +1026,8 @@ HIGHBD_MASK_SUBPIX_VAR(128, 128)
#if CONFIG_VP10 && CONFIG_OBMC
void
obmc_variance
(
const
uint8_t
*
a
,
int
a_stride
,
const
int
*
b
,
int
b_stride
,
const
int
*
m
,
int
m_stride
,
const
int
32_t
*
b
,
const
int
32_t
*
m
,
int
w
,
int
h
,
unsigned
int
*
sse
,
int
*
sum
)
{
int
i
,
j
;
...
...
@@ -1036,26 +1036,24 @@ void obmc_variance(const uint8_t *a, int a_stride,
for
(
i
=
0
;
i
<
h
;
i
++
)
{
for
(
j
=
0
;
j
<
w
;
j
++
)
{