Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
b49ac0b1
Commit
b49ac0b1
authored
Nov 09, 2015
by
Yaowu Xu
Committed by
hui su
Nov 09, 2015
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into nextgenv2
Change-Id: I8811bfd8fc132b9f515707e795bb6308e4bf263b
parents
bc54f9dc
420e8d6d
Changes
16
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
895 additions
and
2160 deletions
+895
-2160
test/sad_test.cc
test/sad_test.cc
+367
-599
test/sixtap_predict_test.cc
test/sixtap_predict_test.cc
+22
-44
test/variance_test.cc
test/variance_test.cc
+462
-1149
vp10/encoder/encoder.h
vp10/encoder/encoder.h
+1
-1
vp10/encoder/rd.c
vp10/encoder/rd.c
+3
-2
vp10/encoder/rdopt.c
vp10/encoder/rdopt.c
+7
-5
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_rtcd_defs.pl
+1
-1
vp9/encoder/vp9_encoder.c
vp9/encoder/vp9_encoder.c
+0
-30
vp9/encoder/vp9_encoder.h
vp9/encoder/vp9_encoder.h
+1
-1
vp9/encoder/vp9_mcomp.c
vp9/encoder/vp9_mcomp.c
+16
-10
vp9/encoder/vp9_mcomp.h
vp9/encoder/vp9_mcomp.h
+3
-3
vp9/encoder/vp9_noise_estimate.c
vp9/encoder/vp9_noise_estimate.c
+1
-0
vp9/encoder/vp9_rd.c
vp9/encoder/vp9_rd.c
+6
-4
vp9/encoder/vp9_rdopt.c
vp9/encoder/vp9_rdopt.c
+5
-2
vp9/encoder/x86/vp9_diamond_search_sad_avx.c
vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+0
-308
vp9/vp9cx.mk
vp9/vp9cx.mk
+0
-1
No files found.
test/sad_test.cc
View file @
b49ac0b1
This diff is collapsed.
Click to expand it.
test/sixtap_predict_test.cc
View file @
b49ac0b1
...
...
@@ -186,70 +186,48 @@ TEST_P(SixtapPredictTest, TestWithRandomData) {
using
std
::
tr1
::
make_tuple
;
const
SixtapPredictFunc
sixtap_16x16_c
=
vp8_sixtap_predict16x16_c
;
const
SixtapPredictFunc
sixtap_8x8_c
=
vp8_sixtap_predict8x8_c
;
const
SixtapPredictFunc
sixtap_8x4_c
=
vp8_sixtap_predict8x4_c
;
const
SixtapPredictFunc
sixtap_4x4_c
=
vp8_sixtap_predict4x4_c
;
INSTANTIATE_TEST_CASE_P
(
C
,
SixtapPredictTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
sixtap_
16x16_c
),
make_tuple
(
8
,
8
,
sixtap_
8x8_c
),
make_tuple
(
8
,
4
,
sixtap_
8x4_c
),
make_tuple
(
4
,
4
,
sixtap_
4x4_c
)));
make_tuple
(
16
,
16
,
&
vp8_sixtap_predict
16x16_c
),
make_tuple
(
8
,
8
,
&
vp8_sixtap_predict
8x8_c
),
make_tuple
(
8
,
4
,
&
vp8_sixtap_predict
8x4_c
),
make_tuple
(
4
,
4
,
&
vp8_sixtap_predict
4x4_c
)));
#if HAVE_NEON
const
SixtapPredictFunc
sixtap_16x16_neon
=
vp8_sixtap_predict16x16_neon
;
const
SixtapPredictFunc
sixtap_8x8_neon
=
vp8_sixtap_predict8x8_neon
;
const
SixtapPredictFunc
sixtap_8x4_neon
=
vp8_sixtap_predict8x4_neon
;
INSTANTIATE_TEST_CASE_P
(
NEON
,
SixtapPredictTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
sixtap_
16x16_neon
),
make_tuple
(
8
,
8
,
sixtap_
8x8_neon
),
make_tuple
(
8
,
4
,
sixtap_
8x4_neon
)));
make_tuple
(
16
,
16
,
&
vp8_sixtap_predict
16x16_neon
),
make_tuple
(
8
,
8
,
&
vp8_sixtap_predict
8x8_neon
),
make_tuple
(
8
,
4
,
&
vp8_sixtap_predict
8x4_neon
)));
#endif
#if HAVE_MMX
const
SixtapPredictFunc
sixtap_16x16_mmx
=
vp8_sixtap_predict16x16_mmx
;
const
SixtapPredictFunc
sixtap_8x8_mmx
=
vp8_sixtap_predict8x8_mmx
;
const
SixtapPredictFunc
sixtap_8x4_mmx
=
vp8_sixtap_predict8x4_mmx
;
const
SixtapPredictFunc
sixtap_4x4_mmx
=
vp8_sixtap_predict4x4_mmx
;
INSTANTIATE_TEST_CASE_P
(
MMX
,
SixtapPredictTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
sixtap_
16x16_mmx
),
make_tuple
(
8
,
8
,
sixtap_
8x8_mmx
),
make_tuple
(
8
,
4
,
sixtap_
8x4_mmx
),
make_tuple
(
4
,
4
,
sixtap_
4x4_mmx
)));
make_tuple
(
16
,
16
,
&
vp8_sixtap_predict
16x16_mmx
),
make_tuple
(
8
,
8
,
&
vp8_sixtap_predict
8x8_mmx
),
make_tuple
(
8
,
4
,
&
vp8_sixtap_predict
8x4_mmx
),
make_tuple
(
4
,
4
,
&
vp8_sixtap_predict
4x4_mmx
)));
#endif
#if HAVE_SSE2
const
SixtapPredictFunc
sixtap_16x16_sse2
=
vp8_sixtap_predict16x16_sse2
;
const
SixtapPredictFunc
sixtap_8x8_sse2
=
vp8_sixtap_predict8x8_sse2
;
const
SixtapPredictFunc
sixtap_8x4_sse2
=
vp8_sixtap_predict8x4_sse2
;
INSTANTIATE_TEST_CASE_P
(
SSE2
,
SixtapPredictTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
sixtap_
16x16_sse2
),
make_tuple
(
8
,
8
,
sixtap_
8x8_sse2
),
make_tuple
(
8
,
4
,
sixtap_
8x4_sse2
)));
make_tuple
(
16
,
16
,
&
vp8_sixtap_predict
16x16_sse2
),
make_tuple
(
8
,
8
,
&
vp8_sixtap_predict
8x8_sse2
),
make_tuple
(
8
,
4
,
&
vp8_sixtap_predict
8x4_sse2
)));
#endif
#if HAVE_SSSE3
const
SixtapPredictFunc
sixtap_16x16_ssse3
=
vp8_sixtap_predict16x16_ssse3
;
const
SixtapPredictFunc
sixtap_8x8_ssse3
=
vp8_sixtap_predict8x8_ssse3
;
const
SixtapPredictFunc
sixtap_8x4_ssse3
=
vp8_sixtap_predict8x4_ssse3
;
const
SixtapPredictFunc
sixtap_4x4_ssse3
=
vp8_sixtap_predict4x4_ssse3
;
INSTANTIATE_TEST_CASE_P
(
SSSE3
,
SixtapPredictTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
sixtap_
16x16_ssse3
),
make_tuple
(
8
,
8
,
sixtap_
8x8_ssse3
),
make_tuple
(
8
,
4
,
sixtap_
8x4_ssse3
),
make_tuple
(
4
,
4
,
sixtap_
4x4_ssse3
)));
make_tuple
(
16
,
16
,
&
vp8_sixtap_predict
16x16_ssse3
),
make_tuple
(
8
,
8
,
&
vp8_sixtap_predict
8x8_ssse3
),
make_tuple
(
8
,
4
,
&
vp8_sixtap_predict
8x4_ssse3
),
make_tuple
(
4
,
4
,
&
vp8_sixtap_predict
4x4_ssse3
)));
#endif
#if HAVE_MSA
const
SixtapPredictFunc
sixtap_16x16_msa
=
vp8_sixtap_predict16x16_msa
;
const
SixtapPredictFunc
sixtap_8x8_msa
=
vp8_sixtap_predict8x8_msa
;
const
SixtapPredictFunc
sixtap_8x4_msa
=
vp8_sixtap_predict8x4_msa
;
const
SixtapPredictFunc
sixtap_4x4_msa
=
vp8_sixtap_predict4x4_msa
;
INSTANTIATE_TEST_CASE_P
(
MSA
,
SixtapPredictTest
,
::
testing
::
Values
(
make_tuple
(
16
,
16
,
sixtap_
16x16_msa
),
make_tuple
(
8
,
8
,
sixtap_
8x8_msa
),
make_tuple
(
8
,
4
,
sixtap_
8x4_msa
),
make_tuple
(
4
,
4
,
sixtap_
4x4_msa
)));
make_tuple
(
16
,
16
,
&
vp8_sixtap_predict
16x16_msa
),
make_tuple
(
8
,
8
,
&
vp8_sixtap_predict
8x8_msa
),
make_tuple
(
8
,
4
,
&
vp8_sixtap_predict
8x4_msa
),
make_tuple
(
4
,
4
,
&
vp8_sixtap_predict
4x4_msa
)));
#endif
}
// namespace
test/variance_test.cc
View file @
b49ac0b1
This diff is collapsed.
Click to expand it.
vp10/encoder/encoder.h
View file @
b49ac0b1
...
...
@@ -456,7 +456,7 @@ typedef struct VP10_COMP {
int
mbmode_cost
[
INTRA_MODES
];
unsigned
int
inter_mode_cost
[
INTER_MODE_CONTEXTS
][
INTER_MODES
];
int
intra_uv_mode_cost
[
INTRA_MODES
];
int
intra_uv_mode_cost
[
INTRA_MODES
]
[
INTRA_MODES
]
;
int
y_mode_costs
[
INTRA_MODES
][
INTRA_MODES
][
INTRA_MODES
];
int
switchable_interp_costs
[
SWITCHABLE_FILTER_CONTEXTS
][
SWITCHABLE_FILTERS
];
int
partition_cost
[
PARTITION_CONTEXTS
][
PARTITION_TYPES
];
...
...
vp10/encoder/rd.c
View file @
b49ac0b1
...
...
@@ -76,8 +76,9 @@ static void fill_mode_costs(VP10_COMP *cpi) {
vp10_intra_mode_tree
);
vp10_cost_tokens
(
cpi
->
mbmode_cost
,
fc
->
y_mode_prob
[
1
],
vp10_intra_mode_tree
);
vp10_cost_tokens
(
cpi
->
intra_uv_mode_cost
,
fc
->
uv_mode_prob
[
TM_PRED
],
vp10_intra_mode_tree
);
for
(
i
=
0
;
i
<
INTRA_MODES
;
++
i
)
vp10_cost_tokens
(
cpi
->
intra_uv_mode_cost
[
i
],
fc
->
uv_mode_prob
[
i
],
vp10_intra_mode_tree
);
for
(
i
=
0
;
i
<
SWITCHABLE_FILTER_CONTEXTS
;
++
i
)
vp10_cost_tokens
(
cpi
->
switchable_interp_costs
[
i
],
...
...
vp10/encoder/rdopt.c
View file @
b49ac0b1
...
...
@@ -2489,7 +2489,7 @@ static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
this_rate
=
this_rate_tokenonly
+
vp10_cost_bit
(
cpi
->
common
.
fc
->
ext_intra_probs
[
1
],
1
)
+
vp10_cost_bit
(
DR_EXT_INTRA_PROB
,
0
)
+
cpi
->
intra_uv_mode_cost
[
mbmi
->
uv_mode
]
+
cpi
->
intra_uv_mode_cost
[
mbmi
->
mode
][
mbmi
->
uv_mode
]
+
write_uniform_cost
(
FILTER_INTRA_MODES
,
mode
);
this_rd
=
RDCOST
(
x
->
rdmult
,
x
->
rddiv
,
this_rate
,
this_distortion
);
if
(
this_rd
<
*
best_rd
)
{
...
...
@@ -2533,7 +2533,7 @@ static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
this_rate
=
this_rate_tokenonly
+
vp10_cost_bit
(
cpi
->
common
.
fc
->
ext_intra_probs
[
1
],
1
)
+
(
DR_ONLY
?
0
:
vp10_cost_bit
(
DR_EXT_INTRA_PROB
,
1
))
+
cpi
->
intra_uv_mode_cost
[
mbmi
->
uv_mode
]
+
cpi
->
intra_uv_mode_cost
[
mbmi
->
mode
][
mbmi
->
uv_mode
]
+
write_uniform_cost
(
EXT_INTRA_ANGLES
,
angle
);
this_rd
=
RDCOST
(
x
->
rdmult
,
x
->
rddiv
,
this_rate
,
this_distortion
);
if
(
this_rd
<
*
best_rd
)
{
...
...
@@ -2573,7 +2573,7 @@ static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
this_rate
=
this_rate_tokenonly
+
vp10_cost_bit
(
cpi
->
common
.
fc
->
ext_intra_probs
[
1
],
1
)
+
(
DR_ONLY
?
0
:
vp10_cost_bit
(
DR_EXT_INTRA_PROB
,
1
))
+
cpi
->
intra_uv_mode_cost
[
mbmi
->
uv_mode
]
+
cpi
->
intra_uv_mode_cost
[
mbmi
->
mode
][
mbmi
->
uv_mode
]
+
write_uniform_cost
(
EXT_INTRA_ANGLES
,
angle
);
this_rd
=
RDCOST
(
x
->
rdmult
,
x
->
rddiv
,
this_rate
,
this_distortion
);
if
(
this_rd
<
*
best_rd
)
{
...
...
@@ -2635,7 +2635,8 @@ static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
if
(
!
super_block_uvrd
(
cpi
,
x
,
&
this_rate_tokenonly
,
&
this_distortion
,
&
s
,
&
this_sse
,
bsize
,
best_rd
))
continue
;
this_rate
=
this_rate_tokenonly
+
cpi
->
intra_uv_mode_cost
[
mode
];
this_rate
=
this_rate_tokenonly
+
cpi
->
intra_uv_mode_cost
[
xd
->
mi
[
0
]
->
mbmi
.
mode
][
mode
];
#if CONFIG_EXT_INTRA
if
(
mode
==
DC_PRED
)
this_rate
+=
vp10_cost_bit
(
cpi
->
common
.
fc
->
ext_intra_probs
[
1
],
0
);
...
...
@@ -2683,7 +2684,8 @@ static int64_t rd_sbuv_dcpred(const VP10_COMP *cpi, MACROBLOCK *x,
memset
(
x
->
skip_txfm
,
SKIP_TXFM_NONE
,
sizeof
(
x
->
skip_txfm
));
super_block_uvrd
(
cpi
,
x
,
rate_tokenonly
,
distortion
,
skippable
,
&
unused
,
bsize
,
INT64_MAX
);
*
rate
=
*
rate_tokenonly
+
cpi
->
intra_uv_mode_cost
[
DC_PRED
];
*
rate
=
*
rate_tokenonly
+
cpi
->
intra_uv_mode_cost
[
x
->
e_mbd
.
mi
[
0
]
->
mbmi
.
mode
][
DC_PRED
];
return
RDCOST
(
x
->
rdmult
,
x
->
rddiv
,
*
rate
,
*
distortion
);
}
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
b49ac0b1
...
...
@@ -312,7 +312,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
$vp9_full_search_sad_sse4_1
=
vp9_full_search_sadx8
;
add_proto
qw/int vp9_diamond_search_sad/
,
"
const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv
";
specialize
qw/vp9_diamond_search_sad
avx
/
;
specialize
qw/vp9_diamond_search_sad/
;
add_proto
qw/int vp9_full_range_search/
,
"
const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv
";
specialize
qw/vp9_full_range_search/
;
...
...
vp9/encoder/vp9_encoder.c
View file @
b49ac0b1
...
...
@@ -1570,30 +1570,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
#endif
#define log2f(x) (log (x) / (float) M_LOG2_E)
/***********************************************************************
* Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' *
***********************************************************************
* The following 2 functions ('cal_nmvjointsadcost' and *
* 'cal_nmvsadcosts') are used to calculate cost lookup tables *
* used by 'vp9_diamond_search_sad'. The C implementation of the *
* function is generic, but the AVX intrinsics optimised version *
* relies on the following properties of the computed tables: *
* For cal_nmvjointsadcost: *
* - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
* For cal_nmvsadcosts: *
* - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
* (Equal costs for both components) *
* - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
* (Cost function is even) *
* If these do not hold, then the AVX optimised version of the *
* 'vp9_diamond_search_sad' function cannot be used as it is, in which *
* case you can revert to using the C function instead. *
***********************************************************************/
static
void
cal_nmvjointsadcost
(
int
*
mvjointsadcost
)
{
/*********************************************************************
* Warning: Read the comments above before modifying this function *
*********************************************************************/
mvjointsadcost
[
0
]
=
600
;
mvjointsadcost
[
1
]
=
300
;
mvjointsadcost
[
2
]
=
300
;
...
...
@@ -1601,9 +1578,6 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) {
}
static
void
cal_nmvsadcosts
(
int
*
mvsadcost
[
2
])
{
/*********************************************************************
* Warning: Read the comments above before modifying this function *
*********************************************************************/
int
i
=
1
;
mvsadcost
[
0
][
0
]
=
0
;
...
...
@@ -1765,10 +1739,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi
->
first_time_stamp_ever
=
INT64_MAX
;
/*********************************************************************
* Warning: Read the comments around 'cal_nmvjointsadcost' and *
* 'cal_nmvsadcosts' before modifying how these tables are computed. *
*********************************************************************/
cal_nmvjointsadcost
(
cpi
->
td
.
mb
.
nmvjointsadcost
);
cpi
->
td
.
mb
.
nmvcost
[
0
]
=
&
cpi
->
nmvcosts
[
0
][
MV_MAX
];
cpi
->
td
.
mb
.
nmvcost
[
1
]
=
&
cpi
->
nmvcosts
[
1
][
MV_MAX
];
...
...
vp9/encoder/vp9_encoder.h
View file @
b49ac0b1
...
...
@@ -470,7 +470,7 @@ typedef struct VP9_COMP {
int
mbmode_cost
[
INTRA_MODES
];
unsigned
int
inter_mode_cost
[
INTER_MODE_CONTEXTS
][
INTER_MODES
];
int
intra_uv_mode_cost
[
FRAME_TYPES
][
INTRA_MODES
];
int
intra_uv_mode_cost
[
FRAME_TYPES
][
INTRA_MODES
]
[
INTRA_MODES
]
;
int
y_mode_costs
[
INTRA_MODES
][
INTRA_MODES
][
INTRA_MODES
];
int
switchable_interp_costs
[
SWITCHABLE_FILTER_CONTEXTS
][
SWITCHABLE_FILTERS
];
int
partition_cost
[
PARTITION_CONTEXTS
][
PARTITION_TYPES
];
...
...
vp9/encoder/vp9_mcomp.c
View file @
b49ac0b1
...
...
@@ -101,8 +101,11 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
}
void
vp9_init_dsmotion_compensation
(
search_site_config
*
cfg
,
int
stride
)
{
int
len
;
int
ss_count
=
0
;
int
len
,
ss_count
=
1
;
cfg
->
ss_mv
[
0
].
col
=
0
;
cfg
->
ss_mv
[
0
].
row
=
0
;
cfg
->
ss_os
[
0
]
=
0
;
for
(
len
=
MAX_FIRST_STEP
;
len
>
0
;
len
/=
2
)
{
// Generate offsets for 4 search sites per step.
...
...
@@ -114,13 +117,16 @@ void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
}
}
cfg
->
ss_count
=
ss_count
;
cfg
->
searches_per_step
=
4
;
cfg
->
total_steps
=
ss_count
/
cfg
->
searches_per_step
;
}
void
vp9_init3smotion_compensation
(
search_site_config
*
cfg
,
int
stride
)
{
int
len
;
int
ss_count
=
0
;
int
len
,
ss_count
=
1
;
cfg
->
ss_mv
[
0
].
col
=
0
;
cfg
->
ss_mv
[
0
].
row
=
0
;
cfg
->
ss_os
[
0
]
=
0
;
for
(
len
=
MAX_FIRST_STEP
;
len
>
0
;
len
/=
2
)
{
// Generate offsets for 8 search sites per step.
...
...
@@ -135,8 +141,8 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
}
}
cfg
->
ss_count
=
ss_count
;
cfg
->
searches_per_step
=
8
;
cfg
->
total_steps
=
ss_count
/
cfg
->
searches_per_step
;
}
/*
...
...
@@ -1606,8 +1612,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
const
uint8_t
*
best_address
;
unsigned
int
bestsad
=
INT_MAX
;
int
best_site
=
-
1
;
int
last_site
=
-
1
;
int
best_site
=
0
;
int
last_site
=
0
;
int
ref_row
;
int
ref_col
;
...
...
@@ -1620,7 +1626,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
const
MV
*
ss_mv
=
&
cfg
->
ss_mv
[
search_param
*
cfg
->
searches_per_step
];
const
intptr_t
*
ss_os
=
&
cfg
->
ss_os
[
search_param
*
cfg
->
searches_per_step
];
const
int
tot_steps
=
(
cfg
->
total_steps
)
-
search_param
;
const
int
tot_steps
=
(
cfg
->
ss_count
/
cfg
->
searches_per_step
)
-
search_param
;
const
MV
fcenter_mv
=
{
center_mv
->
row
>>
3
,
center_mv
->
col
>>
3
};
clamp_mv
(
ref_mv
,
x
->
mv_col_min
,
x
->
mv_col_max
,
x
->
mv_row_min
,
x
->
mv_row_max
);
...
...
@@ -1638,7 +1644,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
bestsad
=
fn_ptr
->
sdf
(
what
,
what_stride
,
in_what
,
in_what_stride
)
+
mvsad_err_cost
(
x
,
best_mv
,
&
fcenter_mv
,
sad_per_bit
);
i
=
0
;
i
=
1
;
for
(
step
=
0
;
step
<
tot_steps
;
step
++
)
{
int
all_in
=
1
,
t
;
...
...
vp9/encoder/vp9_mcomp.h
View file @
b49ac0b1
...
...
@@ -33,10 +33,10 @@ extern "C" {
typedef
struct
search_site_config
{
// motion search sites
MV
ss_mv
[
8
*
MAX_MVSEARCH_STEPS
];
// Motion vector
intptr_t
ss_os
[
8
*
MAX_MVSEARCH_STEPS
];
// Offset
MV
ss_mv
[
8
*
MAX_MVSEARCH_STEPS
+
1
];
// Motion vector
intptr_t
ss_os
[
8
*
MAX_MVSEARCH_STEPS
+
1
];
// Offset
int
ss_count
;
int
searches_per_step
;
int
total_steps
;
}
search_site_config
;
void
vp9_init_dsmotion_compensation
(
search_site_config
*
cfg
,
int
stride
);
...
...
vp9/encoder/vp9_noise_estimate.c
View file @
b49ac0b1
...
...
@@ -53,6 +53,7 @@ int enable_noise_estimation(VP9_COMP *const cpi) {
cpi
->
oxcf
.
aq_mode
==
CYCLIC_REFRESH_AQ
&&
cpi
->
oxcf
.
speed
>=
5
&&
cpi
->
resize_state
==
ORIG
&&
cpi
->
resize_pending
==
0
&&
!
cpi
->
use_svc
&&
cpi
->
oxcf
.
content
!=
VP9E_CONTENT_SCREEN
&&
cpi
->
common
.
width
>=
640
&&
...
...
vp9/encoder/vp9_rd.c
View file @
b49ac0b1
...
...
@@ -76,10 +76,12 @@ static void fill_mode_costs(VP9_COMP *cpi) {
vp9_intra_mode_tree
);
vp9_cost_tokens
(
cpi
->
mbmode_cost
,
fc
->
y_mode_prob
[
1
],
vp9_intra_mode_tree
);
vp9_cost_tokens
(
cpi
->
intra_uv_mode_cost
[
KEY_FRAME
],
vp9_kf_uv_mode_prob
[
TM_PRED
],
vp9_intra_mode_tree
);
vp9_cost_tokens
(
cpi
->
intra_uv_mode_cost
[
INTER_FRAME
],
fc
->
uv_mode_prob
[
TM_PRED
],
vp9_intra_mode_tree
);
for
(
i
=
0
;
i
<
INTRA_MODES
;
++
i
)
{
vp9_cost_tokens
(
cpi
->
intra_uv_mode_cost
[
KEY_FRAME
][
i
],
vp9_kf_uv_mode_prob
[
i
],
vp9_intra_mode_tree
);
vp9_cost_tokens
(
cpi
->
intra_uv_mode_cost
[
INTER_FRAME
][
i
],
fc
->
uv_mode_prob
[
i
],
vp9_intra_mode_tree
);
}
for
(
i
=
0
;
i
<
SWITCHABLE_FILTER_CONTEXTS
;
++
i
)
vp9_cost_tokens
(
cpi
->
switchable_interp_costs
[
i
],
...
...
vp9/encoder/vp9_rdopt.c
View file @
b49ac0b1
...
...
@@ -1199,7 +1199,8 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
&
this_distortion
,
&
s
,
&
this_sse
,
bsize
,
best_rd
))
continue
;
this_rate
=
this_rate_tokenonly
+
cpi
->
intra_uv_mode_cost
[
cpi
->
common
.
frame_type
][
mode
];
cpi
->
intra_uv_mode_cost
[
cpi
->
common
.
frame_type
]
[
xd
->
mi
[
0
]
->
mbmi
.
mode
][
mode
];
this_rd
=
RDCOST
(
x
->
rdmult
,
x
->
rddiv
,
this_rate
,
this_distortion
);
if
(
this_rd
<
best_rd
)
{
...
...
@@ -1229,7 +1230,9 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
memset
(
x
->
skip_txfm
,
SKIP_TXFM_NONE
,
sizeof
(
x
->
skip_txfm
));
super_block_uvrd
(
cpi
,
x
,
rate_tokenonly
,
distortion
,
skippable
,
&
unused
,
bsize
,
INT64_MAX
);
*
rate
=
*
rate_tokenonly
+
cpi
->
intra_uv_mode_cost
[
cm
->
frame_type
][
DC_PRED
];
*
rate
=
*
rate_tokenonly
+
cpi
->
intra_uv_mode_cost
[
cm
->
frame_type
]
[
x
->
e_mbd
.
mi
[
0
]
->
mbmi
.
mode
][
DC_PRED
];
return
RDCOST
(
x
->
rdmult
,
x
->
rddiv
,
*
rate
,
*
distortion
);
}
...
...
vp9/encoder/x86/vp9_diamond_search_sad_avx.c
deleted
100644 → 0
View file @
bc54f9dc
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <emmintrin.h>
#include <smmintrin.h>
#include "vpx_dsp/vpx_dsp_common.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vpx_ports/mem.h"
#ifdef __GNUC__
# define __likely__(v) __builtin_expect(v, 1)
# define __unlikely__(v) __builtin_expect(v, 0)
#else
# define __likely__(v) (v)
# define __unlikely__(v) (v)
#endif
static
INLINE
MV_JOINT_TYPE
get_mv_joint
(
const
int_mv
mv
)
{
// This is simplified from the C implementation to utilise that
// x->nmvjointsadcost[1] == x->nmvjointsadcost[2] and
// x->nmvjointsadcost[1] == x->nmvjointsadcost[3]
return
mv
.
as_int
==
0
?
0
:
1
;
}
static
INLINE
int
mv_cost
(
const
int_mv
mv
,
const
int
*
joint_cost
,
int
*
const
comp_cost
[
2
])
{
return
joint_cost
[
get_mv_joint
(
mv
)]
+
comp_cost
[
0
][
mv
.
as_mv
.
row
]
+
comp_cost
[
1
][
mv
.
as_mv
.
col
];
}
static
int
mvsad_err_cost
(
const
MACROBLOCK
*
x
,
const
int_mv
mv
,
const
MV
*
ref
,
int
error_per_bit
)
{
const
int_mv
diff
=
{
.
as_mv
=
{
mv
.
as_mv
.
row
-
ref
->
row
,
mv
.
as_mv
.
col
-
ref
->
col
}
};
return
ROUND_POWER_OF_TWO
(
mv_cost
(
diff
,
x
->
nmvjointsadcost
,
x
->
nmvsadcost
)
*
error_per_bit
,
8
);
}
/*****************************************************************************
* This function utilises 3 properties of the cost function lookup tables, *
* constructed in using 'cal_nmvjointsadcost' and 'cal_nmvsadcosts' in *
* vp9_encoder.c. *
* For the joint cost: *
* - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
* For the component costs: *
* - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
* (Equal costs for both components) *
* - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
* (Cost function is even) *
* If these do not hold, then this function cannot be used without *
* modification, in which case you can revert to using the C implementation, *
* which does not rely on these properties. *
*****************************************************************************/
int
vp9_diamond_search_sad_avx
(
const
MACROBLOCK
*
x
,
const
search_site_config
*
cfg
,
MV
*
ref_mv
,
MV
*
best_mv
,
int
search_param
,
int
sad_per_bit
,
int
*
num00
,
const
vp9_variance_fn_ptr_t
*
fn_ptr
,
const
MV
*
center_mv
)
{
const
int_mv
maxmv
=
{
.
as_mv
=
{
x
->
mv_row_max
,
x
->
mv_col_max
}
};
const
__m128i
v_max_mv_w
=
_mm_set1_epi32
(
maxmv
.
as_int
);
const
int_mv
minmv
=
{
.
as_mv
=
{
x
->
mv_row_min
,
x
->
mv_col_min
}
};
const
__m128i
v_min_mv_w
=
_mm_set1_epi32
(
minmv
.
as_int
);
const
__m128i
v_spb_d
=
_mm_set1_epi32
(
sad_per_bit
);
const
__m128i
v_joint_cost_0_d
=
_mm_set1_epi32
(
x
->
nmvjointsadcost
[
0
]);
const
__m128i
v_joint_cost_1_d
=
_mm_set1_epi32
(
x
->
nmvjointsadcost
[
1
]);
// search_param determines the length of the initial step and hence the number
// of iterations.
// 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel...
const
MV
*
ss_mv
=
&
cfg
->
ss_mv
[
cfg
->
searches_per_step
*
search_param
];
const
intptr_t
*
ss_os
=
&
cfg
->
ss_os
[
cfg
->
searches_per_step
*
search_param
];
const
int
tot_steps
=
cfg
->
total_steps
-
search_param
;
const
int_mv
fcenter_mv
=
{
.
as_mv
=
{
center_mv
->
row
>>
3
,
center_mv
->
col
>>
3
}
};
const
__m128i
vfcmv
=
_mm_set1_epi32
(
fcenter_mv
.
as_int
);
const
int
ref_row
=
clamp
(
ref_mv
->
row
,
minmv
.
as_mv
.
row
,
maxmv
.
as_mv
.
row
);
const
int
ref_col
=
clamp
(
ref_mv
->
col
,
minmv
.
as_mv
.
col
,
maxmv
.
as_mv
.
col
);
int_mv
bmv
=
{
.
as_mv
=
{
ref_row
,
ref_col
}
};
int_mv
new_bmv
=
bmv
;
__m128i
v_bmv_w
=
_mm_set1_epi32
(
bmv
.
as_int
);
const
int
what_stride
=
x
->
plane
[
0
].
src
.
stride
;
const
uint8_t
*
const
what
=
x
->
plane
[
0
].
src
.
buf
;
const
int
in_what_stride
=
x
->
e_mbd
.
plane
[
0
].
pre
[
0
].
stride
;
const
uint8_t
*
const
in_what
=
x
->
e_mbd
.
plane
[
0
].
pre
[
0
].
buf
+
ref_row
*
in_what_stride
+
ref_col
;
// Work out the start point for the search
const
uint8_t
*
best_address
=
in_what
;
const
uint8_t
*
new_best_address
=
best_address
;
#if ARCH_X86_64
__m128i
v_ba_q
=
_mm_set1_epi64x
((
intptr_t
)
best_address
);
#else
__m128i
v_ba_d
=
_mm_set1_epi32
((
intptr_t
)
best_address
);
#endif
unsigned
int
best_sad
;
int
i
;
int
j
;
int
step
;
// Check the starting position
best_sad
=
fn_ptr
->
sdf
(
what
,
what_stride
,
in_what
,
in_what_stride
);
best_sad
+=
mvsad_err_cost
(
x
,
bmv
,
&
fcenter_mv
.
as_mv
,
sad_per_bit
);
*
num00
=
0
;
for
(
i
=
0
,
step
=
0
;
step
<
tot_steps
;
step
++
)
{
for
(
j
=
0
;
j
<
cfg
->
searches_per_step
;
j
+=
4
,
i
+=
4
)
{
__m128i
v_sad_d
;
__m128i
v_cost_d
;
__m128i
v_outside_d
;
__m128i
v_inside_d
;
__m128i
v_diff_mv_w
;
#if ARCH_X86_64
__m128i
v_blocka
[
2
];
#else
__m128i
v_blocka
[
1
];
#endif
// Compute the candidate motion vectors
const
__m128i
v_ss_mv_w
=
_mm_load_si128
((
const
__m128i
*
)
&
ss_mv
[
i
]);
const
__m128i
v_these_mv_w
=
_mm_add_epi16
(
v_bmv_w
,
v_ss_mv_w
);
// Clamp them to the search bounds
__m128i
v_these_mv_clamp_w
=
v_these_mv_w
;
v_these_mv_clamp_w
=
_mm_min_epi16
(
v_these_mv_clamp_w
,
v_max_mv_w
);
v_these_mv_clamp_w
=
_mm_max_epi16
(
v_these_mv_clamp_w
,
v_min_mv_w
);
// The ones that did not change are inside the search area
v_inside_d
=
_mm_cmpeq_epi32
(
v_these_mv_clamp_w
,
v_these_mv_w
);
// If none of them are inside, then move on
if
(
__likely__
(
_mm_test_all_zeros
(
v_inside_d
,
v_inside_d
)))
{
continue
;
}
// The inverse mask indicates which of the MVs are outside
v_outside_d
=
_mm_xor_si128
(
v_inside_d
,
_mm_set1_epi8
(
0xff
));
// Shift right to keep the sign bit clear, we will use this later
// to set the cost to the maximum value.
v_outside_d
=
_mm_srli_epi32
(
v_outside_d
,
1
);
// Compute the difference MV
v_diff_mv_w
=
_mm_sub_epi16
(
v_these_mv_clamp_w
,
vfcmv
);
// We utilise the fact that the cost function is even, and use the
// absolute difference. This allows us to use unsigned indexes later
// and reduces cache pressure somewhat as only a half of the table
// is ever referenced.
v_diff_mv_w
=
_mm_abs_epi16
(
v_diff_mv_w
);
// Compute the SIMD pointer offsets.
{
#if ARCH_X86_64 // sizeof(intptr_t) == 8
// Load the offsets (could use _mm_maskload_ps here, instead of the
// extra 'and' but it's slower that way)
__m128i
v_bo10_q
=
_mm_load_si128
((
const
__m128i
*
)
&
ss_os
[
i
+
0
]);
__m128i
v_bo32_q
=
_mm_load_si128
((
const
__m128i
*
)
&
ss_os
[
i
+
2
]);
// Set the ones falling outside to zero
v_bo10_q
=
_mm_and_si128
(
v_bo10_q
,
_mm_cvtepi32_epi64
(
v_inside_d
));
v_bo32_q
=
_mm_and_si128
(
v_bo32_q
,
_mm_unpackhi_epi32
(
v_inside_d
,
v_inside_d
));
// Compute the candidate addresses
v_blocka
[
0
]
=
_mm_add_epi64
(
v_ba_q
,
v_bo10_q
);
v_blocka
[
1
]
=
_mm_add_epi64
(
v_ba_q
,
v_bo32_q
);
#else // ARCH_X86 // sizeof(intptr_t) == 4
__m128i
v_bo_d
=
_mm_load_si128
((
const
__m128i
*
)
&
ss_os
[
i
]);
v_bo_d
=
_mm_and_si128
(
v_bo_d
,
v_inside_d
);
v_blocka
[
0
]
=
_mm_add_epi32
(
v_ba_d
,
v_bo_d
);
#endif
}
fn_ptr
->
sdx4df
(
what
,
what_stride
,
(
const
uint8_t
**
)
&
v_blocka
[
0
],
in_what_stride
,
(
uint32_t
*
)
&
v_sad_d
);
// Look up the component cost of the residual motion vector
{
const
int32_t
row0
=
_mm_extract_epi16
(
v_diff_mv_w
,
0
);
const
int32_t
col0
=
_mm_extract_epi16
(
v_diff_mv_w
,
1
);
const
int32_t
row1
=
_mm_extract_epi16
(
v_diff_mv_w
,
2
);
const
int32_t
col1
=
_mm_extract_epi16
(
v_diff_mv_w
,
3
);
const
int32_t
row2
=
_mm_extract_epi16
(
v_diff_mv_w
,
4
);
const
int32_t
col2
=
_mm_extract_epi16
(
v_diff_mv_w
,
5
);
const
int32_t
row3
=
_mm_extract_epi16
(
v_diff_mv_w
,
6
);
const
int32_t
col3
=
_mm_extract_epi16
(
v_diff_mv_w
,
7
);
// Note: This is a use case for vpgather in AVX2
const
uint32_t
cost0
=
x
->
nmvsadcost
[
0
][
row0
]
+
x
->
nmvsadcost
[
0
][
col0
];
const
uint32_t
cost1
=
x
->
nmvsadcost
[
0
][
row1
]
+
x
->
nmvsadcost
[
0
][
col1
];
const
uint32_t
cost2
=
x
->
nmvsadcost
[
0
][
row2
]
+
x
->
nmvsadcost
[
0
][
col2
];
const
uint32_t
cost3
=
x
->
nmvsadcost
[
0
][
row3
]
+
x
->
nmvsadcost
[
0
][
col3
];
__m128i
v_cost_10_d
;
__m128i
v_cost_32_d
;
v_cost_10_d
=
_mm_cvtsi32_si128
(
cost0
);
v_cost_10_d
=
_mm_insert_epi32
(
v_cost_10_d
,
cost1
,
1
);
v_cost_32_d
=
_mm_cvtsi32_si128
(
cost2
);
v_cost_32_d
=
_mm_insert_epi32
(
v_cost_32_d
,
cost3
,
1
);
v_cost_d
=
_mm_unpacklo_epi64
(
v_cost_10_d
,
v_cost_32_d
);
}
// Now add in the joint cost
{
const
__m128i
v_sel_d
=
_mm_cmpeq_epi32
(
v_diff_mv_w
,
_mm_setzero_si128
());
const
__m128i
v_joint_cost_d
=
_mm_blendv_epi8
(
v_joint_cost_1_d
,
v_joint_cost_0_d
,
v_sel_d
);
v_cost_d
=
_mm_add_epi32
(
v_cost_d
,
v_joint_cost_d
);
}
// Multiply by sad_per_bit
v_cost_d
=
_mm_mullo_epi32
(
v_cost_d
,
v_spb_d
);
// ROUND_POWER_OF_TWO(v_cost_d, 8)
v_cost_d
=
_mm_add_epi32
(
v_cost_d
,
_mm_set1_epi32
(
0x80
));
v_cost_d
=
_mm_srai_epi32
(
v_cost_d
,
8
);
// Add the cost to the sad
v_sad_d
=
_mm_add_epi32
(
v_sad_d
,
v_cost_d
);
// Make the motion vectors outside the search area have max cost
// by or'ing in the comparison mask, this way the minimum search won't