Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
f73feedb
Commit
f73feedb
authored
Dec 19, 2015
by
Yaowu Xu
Browse files
Merge branch 'master' into nextgenv2
parents
73301080
b597e3e1
Changes
4
Hide whitespace changes
Inline
Side-by-side
vp9/encoder/vp9_speed_features.c
View file @
f73feedb
...
...
@@ -394,7 +394,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf
->
intra_y_mode_bsize_mask
[
i
]
=
INTRA_DC_TM_H_V
;
}
else
{
for
(
i
=
0
;
i
<
BLOCK_SIZES
;
++
i
)
if
(
i
>
=
BLOCK_16X16
)
if
(
i
>
BLOCK_16X16
)
sf
->
intra_y_mode_bsize_mask
[
i
]
=
INTRA_DC
;
else
// Use H and V intra mode for block sizes <= 16X16.
...
...
vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
View file @
f73feedb
...
...
@@ -79,20 +79,13 @@ SECTION .text
%macro INC_SRC_BY_SRC_STRIDE 0
%if ARCH_X86=1 && CONFIG_PIC=1
lea
srcq
,
[
srcq
+
src_stridemp
*
2
]
add
srcq
,
src_stridemp
add
srcq
,
src_stridemp
%else
lea
srcq
,
[
srcq
+
src_strideq
*
2
]
%endif
%endmacro
%macro INC_SRC_BY_SRC_2STRIDE 0
%if ARCH_X86=1 && CONFIG_PIC=1
lea
srcq
,
[
srcq
+
src_stridemp
*
4
]
%else
lea
srcq
,
[
srcq
+
src_strideq
*
4
]
%endif
%endmacro
%macro SUBPEL_VARIANCE 1-2 0
; W
%define bilin_filter_m bilin_filter_m_sse2
%define filter_idx_shift 5
...
...
@@ -984,8 +977,9 @@ SECTION .text
.x_other_y_other_loop:
movu
m2
,
[
srcq
]
movu
m4
,
[
srcq
+
2
]
movu
m3
,
[
srcq
+
src_strideq
*
2
]
movu
m5
,
[
srcq
+
src_strideq
*
2
+
2
]
INC_SRC_BY_SRC_STRIDE
movu
m3
,
[
srcq
]
movu
m5
,
[
srcq
+
2
]
pmullw
m2
,
filter_x_a
pmullw
m4
,
filter_x_b
paddw
m2
,
filter_rnd
...
...
@@ -1018,7 +1012,7 @@ SECTION .text
SUM_SSE
m0
,
m2
,
m4
,
m3
,
m6
,
m7
mova
m0
,
m5
INC_SRC_BY_SRC_
2
STRIDE
INC_SRC_BY_SRC_STRIDE
lea
ds
tq
,
[
ds
tq
+
ds
t_strideq
*
4
]
%if %2 == 1
; avg
add
secq
,
sec_str
...
...
vpx_dsp/x86/highbd_variance_sse2.c
View file @
f73feedb
...
...
@@ -243,13 +243,18 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
}
#if CONFIG_USE_X86INC
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in
// highbd_subpel_variance_impl_sse2.asm
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
ptrdiff_t src_stride, \
int x_offset, int y_offset, \
const uint16_t *dst, \
ptrdiff_t dst_stride, \
int height, unsigned int *sse);
int height, \
unsigned int *sse, \
void *unused0, void *unused);
#define DECLS(opt1, opt2) \
DECL(8, opt1); \
DECL(16, opt1)
...
...
@@ -274,7 +279,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
x_offset, y_offset, \
dst, dst_stride, h, \
&sse); \
&sse
, NULL, NULL
); \
if (w > wf) { \
unsigned int sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
...
...
@@ -282,19 +287,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
x_offset, y_offset, \
dst + 16, \
dst_stride, \
h, &sse2); \
h, &sse2, \
NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
h, &sse2); \
h, &sse2
, NULL, NULL
); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 48, src_stride, x_offset, y_offset, \
dst + 48, dst_stride, h, &sse2); \
dst + 48, dst_stride, h, &sse2
, NULL, NULL
); \
se += se2; \
sse += sse2; \
} \
...
...
@@ -312,7 +318,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
x_offset, y_offset, \
dst, dst_stride, \
h, &sse); \
h, &sse
, NULL, NULL
); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
...
...
@@ -320,20 +326,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
x_offset, y_offset, \
dst + 16, \
dst_stride, \
h, &sse2); \
h, &sse2, \
NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
h, &sse2); \
h, &sse2
, NULL, NULL
); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
x_offset, y_offset, \
dst + 48, dst_stride, \
h, &sse2); \
h, &sse2
, NULL, NULL
); \
se += se2; \
sse += sse2; \
} \
...
...
@@ -359,27 +366,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + (start_row * dst_stride), \
dst_stride, height, &sse2); \
dst_stride, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 16 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
dst_stride, height, &sse2); \
dst_stride, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 32 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
dst_stride, height, &sse2); \
dst_stride, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
dst_stride, height, &sse2); \
dst_stride, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
}\
...
...
@@ -410,6 +417,7 @@ FNS(sse2, sse);
#undef FNS
#undef FN
// The 2 unused parameters are place holders for PIC enabled build.
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
ptrdiff_t src_stride, \
...
...
@@ -419,7 +427,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
const uint16_t *sec, \
ptrdiff_t sec_stride, \
int height, \
unsigned int *sse);
unsigned int *sse, \
void *unused0, void *unused);
#define DECLS(opt1) \
DECL(16, opt1) \
DECL(8, opt1)
...
...
@@ -439,23 +448,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src, src_stride, x_offset, \
y_offset, dst, dst_stride, sec, w, h, &sse); \
y_offset, dst, dst_stride, sec, w, h, &sse
, NULL, NULL
); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 16, src_stride, x_offset, y_offset, \
dst + 16, dst_stride, sec + 16, w, h, &sse2); \
dst + 16, dst_stride, sec + 16, w, h, &sse2
, NULL, NULL
); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 32, src_stride, x_offset, y_offset, \
dst + 32, dst_stride, sec + 32, w, h, &sse2); \
dst + 32, dst_stride, sec + 32, w, h, &sse2
, NULL, NULL
); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48, src_stride, x_offset, y_offset, \
dst + 48, dst_stride, sec + 48, w, h, &sse2); \
dst + 48, dst_stride, sec + 48, w, h, &sse2
, NULL, NULL
); \
se += se2; \
sse += sse2; \
} \
...
...
@@ -475,14 +484,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src, src_stride, x_offset, \
y_offset, dst, dst_stride, \
sec, w, h, &sse); \
sec, w, h, &sse
, NULL, NULL
); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 16, src_stride, \
x_offset, y_offset, \
dst + 16, dst_stride, \
sec + 16, w, h, &sse2); \
sec + 16, w, h, &sse2, \
NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
...
...
@@ -490,14 +500,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
sec + 32, w, h, &sse2); \
sec + 32, w, h, &sse2, \
NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48, src_stride, \
x_offset, y_offset, \
dst + 48, dst_stride, \
sec + 48, w, h, &sse2); \
sec + 48, w, h, &sse2, \
NULL, NULL); \
se += se2; \
sse += sse2; \
} \
...
...
@@ -525,7 +537,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, x_offset, \
y_offset, dst + (start_row * dst_stride), dst_stride, \
sec + (start_row * w), w, height, &sse2); \
sec + (start_row * w), w, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
...
...
@@ -533,7 +545,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 16 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 16 + (start_row * dst_stride), dst_stride, \
sec + 16 + (start_row * w), w, height, &sse2); \
sec + 16 + (start_row * w), w, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
...
...
@@ -541,14 +553,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 32 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 32 + (start_row * dst_stride), dst_stride, \
sec + 32 + (start_row * w), w, height, &sse2); \
sec + 32 + (start_row * w), w, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 48 + (start_row * dst_stride), dst_stride, \
sec + 48 + (start_row * w), w, height, &sse2); \
sec + 48 + (start_row * w), w, height, &sse2
, NULL, NULL
); \
se += se2; \
long_sse += sse2; \
} \
...
...
vpx_dsp/x86/sad_sse2.asm
View file @
f73feedb
...
...
@@ -17,7 +17,7 @@ SECTION .text
%if %3 == 5
cglobal
sad
%
1
x
%
2
,
4
,
%
3
,
5
,
src
,
src_stride
,
ref
,
ref_stride
,
n_rows
%else
; %3 == 7
cglobal
sad
%
1
x
%
2
,
4
,
%
3
,
5
,
src
,
src_stride
,
ref
,
ref_stride
,
\
cglobal
sad
%
1
x
%
2
,
4
,
%
3
,
6
,
src
,
src_stride
,
ref
,
ref_stride
,
\
src_stride3
,
ref_stride3
,
n_rows
%endif
; %3 == 5/7
%else
; avg
...
...
@@ -25,7 +25,7 @@ cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \
cglobal
sad
%
1
x
%
2
_avg
,
5
,
1
+
%
3
,
5
,
src
,
src_stride
,
ref
,
ref_stride
,
\
second_pred
,
n_rows
%else
; %3 == 7
cglobal
sad
%
1
x
%
2
_avg
,
5
,
ARCH_X86_64
+
%
3
,
5
,
src
,
src_stride
,
\
cglobal
sad
%
1
x
%
2
_avg
,
5
,
ARCH_X86_64
+
%
3
,
6
,
src
,
src_stride
,
\
ref
,
ref_stride
,
\
second_pred
,
\
src_stride3
,
ref_stride3
...
...
@@ -244,9 +244,9 @@ SAD8XN 4, 1 ; sad8x4_avg_sse2
movd
m2
,
[
srcq
]
movd
m5
,
[
srcq
+
src_strideq
]
movd
m4
,
[
srcq
+
src_strideq
*
2
]
movd
m
6
,
[
srcq
+
src_stride3q
]
movd
m
3
,
[
srcq
+
src_stride3q
]
punpckldq
m2
,
m5
punpckldq
m4
,
m
6
punpckldq
m4
,
m
3
movlhps
m2
,
m4
psadbw
m1
,
m2
lea
refq
,
[
refq
+
ref_strideq
*
4
]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment