Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
f2b34c3a
Commit
f2b34c3a
authored
Jul 13, 2016
by
Yue Chen
Committed by
Gerrit Code Review
Jul 13, 2016
Browse files
Merge "Optimize and cleanup obmc predictor and rd search." into nextgenv2
parents
ebc2d34c
4c4f04ac
Changes
6
Hide whitespace changes
Inline
Side-by-side
vp10/common/reconinter.c
View file @
f2b34c3a
...
...
@@ -1298,97 +1298,63 @@ void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
#endif // CONFIG_SUPERTX
#if CONFIG_OBMC
// obmc_mask_N[is_neighbor_predictor][overlap_position]
static
const
uint8_t
obmc_mask_1
[
2
][
1
]
=
{
{
55
},
{
9
}
// obmc_mask_N[overlap_position]
static
const
uint8_t
obmc_mask_1
[
1
]
=
{
55
};
static
const
uint8_t
obmc_mask_2
[
2
][
2
]
=
{
{
45
,
62
},
{
19
,
2
}
static
const
uint8_t
obmc_mask_2
[
2
]
=
{
45
,
62
};
static
const
uint8_t
obmc_mask_4
[
2
][
4
]
=
{
{
39
,
50
,
59
,
64
},
{
25
,
14
,
5
,
0
}
static
const
uint8_t
obmc_mask_4
[
4
]
=
{
39
,
50
,
59
,
64
};
static
const
uint8_t
obmc_mask_8
[
2
][
8
]
=
{
{
36
,
42
,
48
,
53
,
57
,
61
,
63
,
64
},
{
28
,
22
,
16
,
11
,
7
,
3
,
1
,
0
}
static
const
uint8_t
obmc_mask_8
[
8
]
=
{
36
,
42
,
48
,
53
,
57
,
61
,
63
,
64
};
static
const
uint8_t
obmc_mask_16
[
2
][
16
]
=
{
{
34
,
37
,
40
,
43
,
46
,
49
,
52
,
54
,
56
,
58
,
60
,
61
,
63
,
64
,
64
,
64
},
{
30
,
27
,
24
,
21
,
18
,
15
,
12
,
10
,
8
,
6
,
4
,
3
,
1
,
0
,
0
,
0
}
static
const
uint8_t
obmc_mask_16
[
16
]
=
{
34
,
37
,
40
,
43
,
46
,
49
,
52
,
54
,
56
,
58
,
60
,
61
,
63
,
64
,
64
,
64
};
static
const
uint8_t
obmc_mask_32
[
2
][
32
]
=
{
{
33
,
35
,
36
,
38
,
40
,
41
,
43
,
44
,
45
,
47
,
48
,
50
,
51
,
52
,
53
,
55
,
56
,
57
,
58
,
59
,
60
,
60
,
61
,
62
,
62
,
63
,
63
,
64
,
64
,
64
,
64
,
64
},
{
31
,
29
,
28
,
26
,
24
,
23
,
21
,
20
,
19
,
17
,
16
,
14
,
13
,
12
,
11
,
9
,
8
,
7
,
6
,
5
,
4
,
4
,
3
,
2
,
2
,
1
,
1
,
0
,
0
,
0
,
0
,
0
}
static
const
uint8_t
obmc_mask_32
[
32
]
=
{
33
,
35
,
36
,
38
,
40
,
41
,
43
,
44
,
45
,
47
,
48
,
50
,
51
,
52
,
53
,
55
,
56
,
57
,
58
,
59
,
60
,
60
,
61
,
62
,
62
,
63
,
63
,
64
,
64
,
64
,
64
,
64
};
#if CONFIG_EXT_PARTITION
static
const
uint8_t
obmc_mask_64
[
2
][
64
]
=
{
{
33
,
34
,
35
,
35
,
36
,
37
,
38
,
39
,
40
,
40
,
41
,
42
,
43
,
44
,
44
,
44
,
45
,
46
,
47
,
47
,
48
,
49
,
50
,
51
,
51
,
51
,
52
,
52
,
53
,
54
,
55
,
56
,
56
,
56
,
57
,
57
,
58
,
58
,
59
,
60
,
60
,
60
,
60
,
60
,
61
,
62
,
62
,
62
,
62
,
62
,
63
,
63
,
63
,
63
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
},
{
31
,
30
,
29
,
29
,
28
,
27
,
26
,
25
,
24
,
24
,
23
,
22
,
21
,
20
,
20
,
20
,
19
,
18
,
17
,
17
,
16
,
15
,
14
,
13
,
13
,
13
,
12
,
12
,
11
,
10
,
9
,
8
,
8
,
8
,
7
,
7
,
6
,
6
,
5
,
4
,
4
,
4
,
4
,
4
,
3
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
}
static
const
uint8_t
obmc_mask_64
[
64
]
=
{
33
,
34
,
35
,
35
,
36
,
37
,
38
,
39
,
40
,
40
,
41
,
42
,
43
,
44
,
44
,
44
,
45
,
46
,
47
,
47
,
48
,
49
,
50
,
51
,
51
,
51
,
52
,
52
,
53
,
54
,
55
,
56
,
56
,
56
,
57
,
57
,
58
,
58
,
59
,
60
,
60
,
60
,
60
,
60
,
61
,
62
,
62
,
62
,
62
,
62
,
63
,
63
,
63
,
63
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
};
#endif // CONFIG_EXT_PARTITION
void
setup_obmc_mask
(
int
length
,
const
uint8_t
*
mask
[
2
]
)
{
const
uint8_t
*
vp10_get_obmc_mask
(
int
length
)
{
switch
(
length
)
{
case
1
:
mask
[
0
]
=
obmc_mask_1
[
0
];
mask
[
1
]
=
obmc_mask_1
[
1
];
break
;
return
obmc_mask_1
;
case
2
:
mask
[
0
]
=
obmc_mask_2
[
0
];
mask
[
1
]
=
obmc_mask_2
[
1
];
break
;
return
obmc_mask_2
;
case
4
:
mask
[
0
]
=
obmc_mask_4
[
0
];
mask
[
1
]
=
obmc_mask_4
[
1
];
break
;
return
obmc_mask_4
;
case
8
:
mask
[
0
]
=
obmc_mask_8
[
0
];
mask
[
1
]
=
obmc_mask_8
[
1
];
break
;
return
obmc_mask_8
;
case
16
:
mask
[
0
]
=
obmc_mask_16
[
0
];
mask
[
1
]
=
obmc_mask_16
[
1
];
break
;
return
obmc_mask_16
;
case
32
:
mask
[
0
]
=
obmc_mask_32
[
0
];
mask
[
1
]
=
obmc_mask_32
[
1
];
break
;
return
obmc_mask_32
;
#if CONFIG_EXT_PARTITION
case
64
:
mask
[
0
]
=
obmc_mask_64
[
0
];
mask
[
1
]
=
obmc_mask_64
[
1
];
break
;
return
obmc_mask_64
;
#endif // CONFIG_EXT_PARTITION
default:
mask
[
0
]
=
NULL
;
mask
[
1
]
=
NULL
;
assert
(
0
);
b
re
ak
;
re
turn
NULL
;
}
}
...
...
@@ -1398,168 +1364,101 @@ void setup_obmc_mask(int length, const uint8_t *mask[2]) {
// xd->plane[].dst.buf
void
vp10_build_obmc_inter_prediction
(
VP10_COMMON
*
cm
,
MACROBLOCKD
*
xd
,
int
mi_row
,
int
mi_col
,
int
use_tmp_dst_buf
,
uint8_t
*
final_buf
[
MAX_MB_PLANE
],
int
final_stride
[
MAX_MB_PLANE
],
uint8_t
*
tmp_buf1
[
MAX_MB_PLANE
],
int
tmp_stride1
[
MAX_MB_PLANE
],
uint8_t
*
tmp_buf2
[
MAX_MB_PLANE
],
int
tmp_stride2
[
MAX_MB_PLANE
])
{
const
TileInfo
*
const
tile
=
&
xd
->
tile
;
BLOCK_SIZE
bsize
=
xd
->
mi
[
0
]
->
mbmi
.
sb_type
;
int
plane
,
i
,
mi_step
;
int
above_available
=
mi_row
>
tile
->
mi_row_start
;
#if CONFIG_VP9_HIGHBITDEPTH
int
is_hbd
=
(
xd
->
cur_buf
->
flags
&
YV12_FLAG_HIGHBITDEPTH
)
?
1
:
0
;
#endif // CONFIG_VP9_HIGHBITDEPTH
if
(
use_tmp_dst_buf
)
{
for
(
plane
=
0
;
plane
<
MAX_MB_PLANE
;
++
plane
)
{
const
struct
macroblockd_plane
*
pd
=
&
xd
->
plane
[
plane
];
int
bw
=
(
xd
->
n8_w
*
8
)
>>
pd
->
subsampling_x
;
int
bh
=
(
xd
->
n8_h
*
8
)
>>
pd
->
subsampling_y
;
int
row
;
#if CONFIG_VP9_HIGHBITDEPTH
if
(
is_hbd
)
{
uint16_t
*
final_buf16
=
CONVERT_TO_SHORTPTR
(
final_buf
[
plane
]);
uint16_t
*
bmc_buf16
=
CONVERT_TO_SHORTPTR
(
pd
->
dst
.
buf
);
for
(
row
=
0
;
row
<
bh
;
++
row
)
memcpy
(
final_buf16
+
row
*
final_stride
[
plane
],
bmc_buf16
+
row
*
pd
->
dst
.
stride
,
bw
*
sizeof
(
uint16_t
));
}
else
{
#endif
for
(
row
=
0
;
row
<
bh
;
++
row
)
memcpy
(
final_buf
[
plane
]
+
row
*
final_stride
[
plane
],
pd
->
dst
.
buf
+
row
*
pd
->
dst
.
stride
,
bw
);
uint8_t
*
above
[
MAX_MB_PLANE
],
int
above_stride
[
MAX_MB_PLANE
],
uint8_t
*
left
[
MAX_MB_PLANE
],
int
left_stride
[
MAX_MB_PLANE
])
{
const
BLOCK_SIZE
bsize
=
xd
->
mi
[
0
]
->
mbmi
.
sb_type
;
int
plane
,
i
;
#if CONFIG_VP9_HIGHBITDEPTH
}
const
int
is_hbd
=
(
xd
->
cur_buf
->
flags
&
YV12_FLAG_HIGHBITDEPTH
)
?
1
:
0
;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
// handle above row
for
(
i
=
0
;
above_available
&&
i
<
VPXMIN
(
xd
->
n8_w
,
cm
->
mi_cols
-
mi_col
);
i
+=
mi_step
)
{
int
mi_row_offset
=
-
1
;
int
mi_col_offset
=
i
;
int
overlap
;
MODE_INFO
*
above_mi
=
xd
->
mi
[
mi_col_offset
+
mi_row_offset
*
xd
->
mi_stride
];
MB_MODE_INFO
*
above_mbmi
=
&
above_mi
->
mbmi
;
mi_step
=
VPXMIN
(
xd
->
n8_w
,
num_8x8_blocks_wide_lookup
[
above_mbmi
->
sb_type
]);
if
(
!
is_neighbor_overlappable
(
above_mbmi
))
continue
;
overlap
=
num_4x4_blocks_high_lookup
[
bsize
]
<<
1
;
for
(
plane
=
0
;
plane
<
MAX_MB_PLANE
;
++
plane
)
{
const
struct
macroblockd_plane
*
pd
=
&
xd
->
plane
[
plane
];
int
bw
=
(
mi_step
*
MI_SIZE
)
>>
pd
->
subsampling_x
;
int
bh
=
overlap
>>
pd
->
subsampling_y
;
int
row
,
col
;
int
dst_stride
=
use_tmp_dst_buf
?
final_stride
[
plane
]
:
pd
->
dst
.
stride
;
uint8_t
*
dst
=
use_tmp_dst_buf
?
&
final_buf
[
plane
][(
i
*
MI_SIZE
)
>>
pd
->
subsampling_x
]
:
&
pd
->
dst
.
buf
[(
i
*
MI_SIZE
)
>>
pd
->
subsampling_x
];
int
tmp_stride
=
tmp_stride1
[
plane
];
uint8_t
*
tmp
=
&
tmp_buf1
[
plane
][(
i
*
MI_SIZE
)
>>
pd
->
subsampling_x
];
const
uint8_t
*
mask
[
2
];
setup_obmc_mask
(
bh
,
mask
);
if
(
xd
->
up_available
)
{
const
int
overlap
=
num_4x4_blocks_high_lookup
[
bsize
]
*
2
;
const
int
miw
=
VPXMIN
(
xd
->
n8_w
,
cm
->
mi_cols
-
mi_col
);
const
int
mi_row_offset
=
-
1
;
assert
(
miw
>
0
);
i
=
0
;
do
{
// for each mi in the above row
const
int
mi_col_offset
=
i
;
const
MB_MODE_INFO
*
const
above_mbmi
=
&
xd
->
mi
[
mi_col_offset
+
mi_row_offset
*
xd
->
mi_stride
]
->
mbmi
;
const
int
mi_step
=
VPXMIN
(
xd
->
n8_w
,
num_8x8_blocks_wide_lookup
[
above_mbmi
->
sb_type
]);
if
(
is_neighbor_overlappable
(
above_mbmi
))
{
for
(
plane
=
0
;
plane
<
MAX_MB_PLANE
;
++
plane
)
{
const
struct
macroblockd_plane
*
pd
=
&
xd
->
plane
[
plane
];
const
int
bw
=
(
mi_step
*
MI_SIZE
)
>>
pd
->
subsampling_x
;
const
int
bh
=
overlap
>>
pd
->
subsampling_y
;
const
int
dst_stride
=
pd
->
dst
.
stride
;
uint8_t
*
const
dst
=
&
pd
->
dst
.
buf
[(
i
*
MI_SIZE
)
>>
pd
->
subsampling_x
];
const
int
tmp_stride
=
above_stride
[
plane
];
const
uint8_t
*
const
tmp
=
&
above
[
plane
][(
i
*
MI_SIZE
)
>>
pd
->
subsampling_x
];
const
uint8_t
*
const
mask
=
vp10_get_obmc_mask
(
bh
);
#if CONFIG_VP9_HIGHBITDEPTH
if
(
is_hbd
)
{
uint16_t
*
dst16
=
CONVERT_TO_SHORTPTR
(
dst
);
uint16_t
*
tmp16
=
CONVERT_TO_SHORTPTR
(
tmp
);
for
(
row
=
0
;
row
<
bh
;
++
row
)
{
for
(
col
=
0
;
col
<
bw
;
++
col
)
dst16
[
col
]
=
ROUND_POWER_OF_TWO
(
mask
[
0
][
row
]
*
dst16
[
col
]
+
mask
[
1
][
row
]
*
tmp16
[
col
],
6
);
dst16
+=
dst_stride
;
tmp16
+=
tmp_stride
;
}
}
else
{
if
(
is_hbd
)
vpx_highbd_blend_a64_vmask
(
dst
,
dst_stride
,
dst
,
dst_stride
,
tmp
,
tmp_stride
,
mask
,
bh
,
bw
,
xd
->
bd
);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
for
(
row
=
0
;
row
<
bh
;
++
row
)
{
for
(
col
=
0
;
col
<
bw
;
++
col
)
dst
[
col
]
=
ROUND_POWER_OF_TWO
(
mask
[
0
][
row
]
*
dst
[
col
]
+
mask
[
1
][
row
]
*
tmp
[
col
],
6
);
dst
+=
dst_stride
;
tmp
+=
tmp_stride
;
}
#if CONFIG_VP9_HIGHBITDEPTH
vpx_blend_a64_vmask
(
dst
,
dst_stride
,
dst
,
dst_stride
,
tmp
,
tmp_stride
,
mask
,
bh
,
bw
);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
// each mi in the above row
i
+=
mi_step
;
}
while
(
i
<
miw
);
}
if
(
mi_col
==
0
||
(
mi_col
-
1
<
tile
->
mi_col_start
))
return
;
// handle left column
for
(
i
=
0
;
i
<
VPXMIN
(
xd
->
n8_h
,
cm
->
mi_rows
-
mi_row
);
i
+=
mi_step
)
{
int
mi_row_offset
=
i
;
int
mi_col_offset
=
-
1
;
int
overlap
;
MODE_INFO
*
left_mi
=
xd
->
mi
[
mi_col_offset
+
mi_row_offset
*
xd
->
mi_stride
];
MB_MODE_INFO
*
left_mbmi
=
&
left_mi
->
mbmi
;
mi_step
=
VPXMIN
(
xd
->
n8_h
,
num_8x8_blocks_high_lookup
[
left_mbmi
->
sb_type
]);
if
(
!
is_neighbor_overlappable
(
left_mbmi
))
continue
;
overlap
=
num_4x4_blocks_wide_lookup
[
bsize
]
<<
1
;
for
(
plane
=
0
;
plane
<
MAX_MB_PLANE
;
++
plane
)
{
const
struct
macroblockd_plane
*
pd
=
&
xd
->
plane
[
plane
];
int
bw
=
overlap
>>
pd
->
subsampling_x
;
int
bh
=
(
mi_step
*
MI_SIZE
)
>>
pd
->
subsampling_y
;
int
row
,
col
;
int
dst_stride
=
use_tmp_dst_buf
?
final_stride
[
plane
]
:
pd
->
dst
.
stride
;
uint8_t
*
dst
=
use_tmp_dst_buf
?
&
final_buf
[
plane
][(
i
*
MI_SIZE
*
dst_stride
)
>>
pd
->
subsampling_y
]
:
&
pd
->
dst
.
buf
[(
i
*
MI_SIZE
*
dst_stride
)
>>
pd
->
subsampling_y
];
int
tmp_stride
=
tmp_stride2
[
plane
];
uint8_t
*
tmp
=
&
tmp_buf2
[
plane
]
[(
i
*
MI_SIZE
*
tmp_stride
)
>>
pd
->
subsampling_y
];
const
uint8_t
*
mask
[
2
];
setup_obmc_mask
(
bw
,
mask
);
if
(
xd
->
left_available
)
{
const
int
overlap
=
num_4x4_blocks_wide_lookup
[
bsize
]
*
2
;
const
int
mih
=
VPXMIN
(
xd
->
n8_h
,
cm
->
mi_rows
-
mi_row
);
const
int
mi_col_offset
=
-
1
;
assert
(
mih
>
0
);
i
=
0
;
do
{
// for each mi in the left column
const
int
mi_row_offset
=
i
;
const
MB_MODE_INFO
*
const
left_mbmi
=
&
xd
->
mi
[
mi_col_offset
+
mi_row_offset
*
xd
->
mi_stride
]
->
mbmi
;
const
int
mi_step
=
VPXMIN
(
xd
->
n8_h
,
num_8x8_blocks_high_lookup
[
left_mbmi
->
sb_type
]);
if
(
is_neighbor_overlappable
(
left_mbmi
))
{
for
(
plane
=
0
;
plane
<
MAX_MB_PLANE
;
++
plane
)
{
const
struct
macroblockd_plane
*
pd
=
&
xd
->
plane
[
plane
];
const
int
bw
=
overlap
>>
pd
->
subsampling_x
;
const
int
bh
=
(
mi_step
*
MI_SIZE
)
>>
pd
->
subsampling_y
;
const
int
dst_stride
=
pd
->
dst
.
stride
;
uint8_t
*
const
dst
=
&
pd
->
dst
.
buf
[(
i
*
MI_SIZE
*
dst_stride
)
>>
pd
->
subsampling_y
];
const
int
tmp_stride
=
left_stride
[
plane
];
const
uint8_t
*
const
tmp
=
&
left
[
plane
][(
i
*
MI_SIZE
*
tmp_stride
)
>>
pd
->
subsampling_y
];
const
uint8_t
*
const
mask
=
vp10_get_obmc_mask
(
bw
);
#if CONFIG_VP9_HIGHBITDEPTH
if
(
is_hbd
)
{
uint16_t
*
dst16
=
CONVERT_TO_SHORTPTR
(
dst
);
uint16_t
*
tmp16
=
CONVERT_TO_SHORTPTR
(
tmp
);
for
(
row
=
0
;
row
<
bh
;
++
row
)
{
for
(
col
=
0
;
col
<
bw
;
++
col
)
dst16
[
col
]
=
ROUND_POWER_OF_TWO
(
mask
[
0
][
col
]
*
dst16
[
col
]
+
mask
[
1
][
col
]
*
tmp16
[
col
],
6
);
dst16
+=
dst_stride
;
tmp16
+=
tmp_stride
;
}
}
else
{
if
(
is_hbd
)
vpx_highbd_blend_a64_hmask
(
dst
,
dst_stride
,
dst
,
dst_stride
,
tmp
,
tmp_stride
,
mask
,
bh
,
bw
,
xd
->
bd
);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
for
(
row
=
0
;
row
<
bh
;
++
row
)
{
for
(
col
=
0
;
col
<
bw
;
++
col
)
dst
[
col
]
=
ROUND_POWER_OF_TWO
(
mask
[
0
][
col
]
*
dst
[
col
]
+
mask
[
1
][
col
]
*
tmp
[
col
],
6
);
dst
+=
dst_stride
;
tmp
+=
tmp_stride
;
}
#if CONFIG_VP9_HIGHBITDEPTH
vpx_blend_a64_hmask
(
dst
,
dst_stride
,
dst
,
dst_stride
,
tmp
,
tmp_stride
,
mask
,
bh
,
bw
);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
// each mi in the left column
i
+=
mi_step
;
}
while
(
i
<
mih
);
}
}
#if CONFIG_EXT_INTER
...
...
vp10/common/reconinter.h
View file @
f2b34c3a
...
...
@@ -562,16 +562,13 @@ static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
#endif // CONFIG_EXT_INTERP
#if CONFIG_OBMC
void
setup_obmc_mask
(
int
length
,
const
uint8_t
*
mask
[
2
]
);
const
uint8_t
*
vp10_get_obmc_mask
(
int
length
);
void
vp10_build_obmc_inter_prediction
(
VP10_COMMON
*
cm
,
MACROBLOCKD
*
xd
,
int
mi_row
,
int
mi_col
,
int
use_tmp_dst_buf
,
uint8_t
*
final_buf
[
MAX_MB_PLANE
],
int
final_stride
[
MAX_MB_PLANE
],
uint8_t
*
tmp_buf1
[
MAX_MB_PLANE
],
int
tmp_stride1
[
MAX_MB_PLANE
],
uint8_t
*
tmp_buf2
[
MAX_MB_PLANE
],
int
tmp_stride2
[
MAX_MB_PLANE
]);
uint8_t
*
above
[
MAX_MB_PLANE
],
int
above_stride
[
MAX_MB_PLANE
],
uint8_t
*
left
[
MAX_MB_PLANE
],
int
left_stride
[
MAX_MB_PLANE
]);
void
vp10_build_prediction_by_above_preds
(
VP10_COMMON
*
cm
,
MACROBLOCKD
*
xd
,
int
mi_row
,
int
mi_col
,
...
...
vp10/decoder/decodeframe.c
View file @
f2b34c3a
...
...
@@ -1385,7 +1385,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
dst_buf2
,
dst_stride2
);
vp10_setup_dst_planes
(
xd
->
plane
,
get_frame_new_buffer
(
cm
),
mi_row
,
mi_col
);
vp10_build_obmc_inter_prediction
(
cm
,
xd
,
mi_row
,
mi_col
,
0
,
NULL
,
NULL
,
vp10_build_obmc_inter_prediction
(
cm
,
xd
,
mi_row
,
mi_col
,
dst_buf1
,
dst_stride1
,
dst_buf2
,
dst_stride2
);
}
...
...
vp10/encoder/encodeframe.c
View file @
f2b34c3a
...
...
@@ -5091,7 +5091,7 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
dst_stride2
);
vp10_setup_dst_planes
(
xd
->
plane
,
get_frame_new_buffer
(
cm
),
mi_row
,
mi_col
);
vp10_build_obmc_inter_prediction
(
cm
,
xd
,
mi_row
,
mi_col
,
0
,
NULL
,
NULL
,
vp10_build_obmc_inter_prediction
(
cm
,
xd
,
mi_row
,
mi_col
,
dst_buf1
,
dst_stride1
,
dst_buf2
,
dst_stride2
);
}
...
...
vp10/encoder/rdopt.c
View file @
f2b34c3a
...
...
@@ -15,6 +15,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/blend.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
...
...
@@ -7780,8 +7781,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#endif // CONFIG_EXT_INTER
}
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0,
NULL, NULL,
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
...
...
@@ -8399,6 +8399,18 @@ static void pick_ext_intra_iframe(VP10_COMP *cpi, MACROBLOCK *x,
}
#endif // CONFIG_EXT_INTRA
#if CONFIG_OBMC
static void calc_target_weighted_pred(
const VP10_COMMON *cm,
const MACROBLOCK *x,
const MACROBLOCKD *xd,
int mi_row, int mi_col,
const uint8_t *above, int above_stride,
const uint8_t *left, int left_stride,
int32_t *mask_buf,
int32_t *wsrc_buf);
#endif // CONFIG_OBMC
void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
TileDataEnc *tile_data,
MACROBLOCK *x,
...
...
@@ -9579,7 +9591,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#if CONFIG_OBMC
if (mbmi->motion_variation == OBMC_CAUSAL)
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col,
0, NULL, NULL,
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
#endif // CONFIG_OBMC
...
...
@@ -10980,189 +10992,225 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
}
#if CONFIG_OBMC
void calc_target_weighted_pred(VP10_COMMON *cm,
MACROBLOCK *x,
MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int32_t *mask_buf,
int32_t *weighted_src_buf) {
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i, mi_step;
int bw = 8 * xd->n8_w;
int bh = 8 * xd->n8_h;
// This function has a structure similar to vp10_build_obmc_inter_prediction
//
// The OBMC predictor is computed as:
//
// PObmc(x,y) =
// VPX_BLEND_A64(Mh(x),
// VPX_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
// PLeft(x, y))
//
// Scaling up by VPX_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
// rounding, this can be written as:
//
// VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
// Mh(x) * Mv(y) * P(x,y) +
// Mh(x) * Cv(y) * Pabove(x,y) +
// VPX_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
//
// Where :
//
// Cv(y) = VPX_BLEND_A64_MAX_ALPHA - Mv(y)
// Ch(y) = VPX_BLEND_A64_MAX_ALPHA - Mh(y)
//
// This function computes 'wsrc' and 'mask' as:
//
// wsrc(x, y) =
// VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA * src(x, y) -
// Mh(x) * Cv(y) * Pabove(x,y) +
// VPX_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
//
// mask(x, y) = Mh(x) * Mv(y)
//
// These can then be used to efficiently approximate the error for any
// predictor P in the context of the provided neighbouring predictors by
// computing:
//
// error(x, y) =
// wsrc(x, y) - mask(x, y) * P(x, y) / (VPX_BLEND_A64_MAX_ALPHA ** 2)
//
static void calc_target_weighted_pred(
const VP10_COMMON *cm,
const MACROBLOCK *x,
const MACROBLOCKD *xd,
int mi_row, int mi_col,
const uint8_t *above, int above_stride,
const uint8_t *left, int left_stride,
int32_t *mask_buf,
int32_t *wsrc_buf) {
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i;
const int bw = 8 * xd->n8_w;
const int bh = 8 * xd->n8_h;
const int wsrc_stride = bw;
const int mask_stride = bw;
const int weighted_src_stride = bw;
int32_t *dst = weighted_src_buf;
int32_t *mask2d = mask_buf;
uint8_t *src;
const int src_scale = VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA;
#if CONFIG_VP9_HIGHBITDEPTH
int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
#else
const int is_hbd = 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col]
= 0;
mask2d[col] = 64
;
}
dst += weighted_src_stride
;
mask2d += mask_stride;
}
// plane 0 should not be subsampled
assert(xd->plane[0].subsampling_x
=
=
0
)
;
assert(xd->plane[0].subsampling_y == 0)
;
vp10_zero_array(wsrc_buf, bw * bh)
;
for (i = 0; i < bw * bh; ++i)
mask_buf[i] = VPX_BLEND_A64_MAX_ALPHA;
// handle above row
if (xd->up_available) {
for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
int mi_row_offset = -1;
int mi_col_offset = i;
MODE_INFO *above_mi = xd->mi[mi_col_offset +
mi_row_offset * xd->mi_stride];
MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
int overlap = num_4x4_blocks_high_lookup[bsize] << 1;
mi_step = VPXMIN(xd->n8_w,
num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
const int miw = VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
const int mi_row_offset = -1;
const uint8_t *const mask1d = vp10_get_obmc_mask(overlap);
assert(miw > 0);
i = 0;
do { // for each mi in the above row
const int mi_col_offset = i;
const MB_MODE_INFO *const above_mbmi =
&xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
const int mi_step =
VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
const int neighbor_bw = mi_step * MI_SIZE;
if (is_neighbor_overlappable(above_mbmi)) {
const struct macroblockd_plane *pd = &xd->plane[0];
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int32_t *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int tmp_stride = above_stride;