Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
989c6930
Commit
989c6930
authored
Feb 04, 2016
by
Scott LaVarnway
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Vidyo patch: Optimization for 1-to-2 downsampling and upsampling.
Change-Id: I9cc9780f506e025aea57485a9e21f0835faf173c
parent
27c13712
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
263 additions
and
18 deletions
+263
-18
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_rtcd_defs.pl
+9
-0
vp9/encoder/vp9_encoder.c
vp9/encoder/vp9_encoder.c
+42
-18
vp9/encoder/x86/vp9_frame_scale_ssse3.c
vp9/encoder/x86/vp9_frame_scale_ssse3.c
+211
-0
vp9/vp9cx.mk
vp9/vp9cx.mk
+1
-0
No files found.
vp9/common/vp9_rtcd_defs.pl
View file @
989c6930
...
@@ -310,6 +310,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
...
@@ -310,6 +310,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
}
}
# End vp9_high encoder functions
# End vp9_high encoder functions
#
# frame based scale
#
if
(
vpx_config
("
CONFIG_VP9_HIGHBITDEPTH
")
eq
"
yes
")
{
}
else
{
add_proto
qw/void vp9_scale_and_extend_frame/
,
"
const struct yv12_buffer_config *src, struct yv12_buffer_config *dst
";
specialize
qw/vp9_scale_and_extend_frame ssse3/
;
}
}
}
# end encoder functions
# end encoder functions
1
;
1
;
vp9/encoder/vp9_encoder.c
View file @
989c6930
...
@@ -2612,10 +2612,6 @@ static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
...
@@ -2612,10 +2612,6 @@ static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
#if CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
static
void
scale_and_extend_frame
(
const
YV12_BUFFER_CONFIG
*
src
,
static
void
scale_and_extend_frame
(
const
YV12_BUFFER_CONFIG
*
src
,
YV12_BUFFER_CONFIG
*
dst
,
int
bd
)
{
YV12_BUFFER_CONFIG
*
dst
,
int
bd
)
{
#else
static
void
scale_and_extend_frame
(
const
YV12_BUFFER_CONFIG
*
src
,
YV12_BUFFER_CONFIG
*
dst
)
{
#endif // CONFIG_VP9_HIGHBITDEPTH
const
int
src_w
=
src
->
y_crop_width
;
const
int
src_w
=
src
->
y_crop_width
;
const
int
src_h
=
src
->
y_crop_height
;
const
int
src_h
=
src
->
y_crop_height
;
const
int
dst_w
=
dst
->
y_crop_width
;
const
int
dst_w
=
dst
->
y_crop_width
;
...
@@ -2627,19 +2623,18 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
...
@@ -2627,19 +2623,18 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
const
InterpKernel
*
const
kernel
=
vp9_filter_kernels
[
EIGHTTAP
];
const
InterpKernel
*
const
kernel
=
vp9_filter_kernels
[
EIGHTTAP
];
int
x
,
y
,
i
;
int
x
,
y
,
i
;
for
(
y
=
0
;
y
<
dst_h
;
y
+=
16
)
{
for
(
x
=
0
;
x
<
dst_w
;
x
+=
16
)
{
for
(
i
=
0
;
i
<
MAX_MB_PLANE
;
++
i
)
{
for
(
i
=
0
;
i
<
MAX_MB_PLANE
;
++
i
)
{
const
int
factor
=
(
i
==
0
||
i
==
3
?
1
:
2
);
const
int
factor
=
(
i
==
0
||
i
==
3
?
1
:
2
);
const
int
x_q4
=
x
*
(
16
/
factor
)
*
src_w
/
dst_w
;
const
int
y_q4
=
y
*
(
16
/
factor
)
*
src_h
/
dst_h
;
const
int
src_stride
=
src_strides
[
i
];
const
int
src_stride
=
src_strides
[
i
];
const
int
dst_stride
=
dst_strides
[
i
];
const
int
dst_stride
=
dst_strides
[
i
];
for
(
y
=
0
;
y
<
dst_h
;
y
+=
16
)
{
const
int
y_q4
=
y
*
(
16
/
factor
)
*
src_h
/
dst_h
;
for
(
x
=
0
;
x
<
dst_w
;
x
+=
16
)
{
const
int
x_q4
=
x
*
(
16
/
factor
)
*
src_w
/
dst_w
;
const
uint8_t
*
src_ptr
=
srcs
[
i
]
+
(
y
/
factor
)
*
src_h
/
dst_h
*
const
uint8_t
*
src_ptr
=
srcs
[
i
]
+
(
y
/
factor
)
*
src_h
/
dst_h
*
src_stride
+
(
x
/
factor
)
*
src_w
/
dst_w
;
src_stride
+
(
x
/
factor
)
*
src_w
/
dst_w
;
uint8_t
*
dst_ptr
=
dsts
[
i
]
+
(
y
/
factor
)
*
dst_stride
+
(
x
/
factor
);
uint8_t
*
dst_ptr
=
dsts
[
i
]
+
(
y
/
factor
)
*
dst_stride
+
(
x
/
factor
);
#if CONFIG_VP9_HIGHBITDEPTH
if
(
src
->
flags
&
YV12_FLAG_HIGHBITDEPTH
)
{
if
(
src
->
flags
&
YV12_FLAG_HIGHBITDEPTH
)
{
vpx_highbd_convolve8
(
src_ptr
,
src_stride
,
dst_ptr
,
dst_stride
,
vpx_highbd_convolve8
(
src_ptr
,
src_stride
,
dst_ptr
,
dst_stride
,
kernel
[
x_q4
&
0xf
],
16
*
src_w
/
dst_w
,
kernel
[
x_q4
&
0xf
],
16
*
src_w
/
dst_w
,
...
@@ -2651,18 +2646,49 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
...
@@ -2651,18 +2646,49 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
kernel
[
y_q4
&
0xf
],
16
*
src_h
/
dst_h
,
kernel
[
y_q4
&
0xf
],
16
*
src_h
/
dst_h
,
16
/
factor
,
16
/
factor
);
16
/
factor
,
16
/
factor
);
}
}
}
}
}
vpx_extend_frame_borders
(
dst
);
}
#else
#else
void
vp9_scale_and_extend_frame_c
(
const
YV12_BUFFER_CONFIG
*
src
,
YV12_BUFFER_CONFIG
*
dst
)
{
const
int
src_w
=
src
->
y_crop_width
;
const
int
src_h
=
src
->
y_crop_height
;
const
int
dst_w
=
dst
->
y_crop_width
;
const
int
dst_h
=
dst
->
y_crop_height
;
const
uint8_t
*
const
srcs
[
3
]
=
{
src
->
y_buffer
,
src
->
u_buffer
,
src
->
v_buffer
};
const
int
src_strides
[
3
]
=
{
src
->
y_stride
,
src
->
uv_stride
,
src
->
uv_stride
};
uint8_t
*
const
dsts
[
3
]
=
{
dst
->
y_buffer
,
dst
->
u_buffer
,
dst
->
v_buffer
};
const
int
dst_strides
[
3
]
=
{
dst
->
y_stride
,
dst
->
uv_stride
,
dst
->
uv_stride
};
const
InterpKernel
*
const
kernel
=
vp9_filter_kernels
[
EIGHTTAP
];
int
x
,
y
,
i
;
for
(
i
=
0
;
i
<
MAX_MB_PLANE
;
++
i
)
{
const
int
factor
=
(
i
==
0
||
i
==
3
?
1
:
2
);
const
int
src_stride
=
src_strides
[
i
];
const
int
dst_stride
=
dst_strides
[
i
];
for
(
y
=
0
;
y
<
dst_h
;
y
+=
16
)
{
const
int
y_q4
=
y
*
(
16
/
factor
)
*
src_h
/
dst_h
;
for
(
x
=
0
;
x
<
dst_w
;
x
+=
16
)
{
const
int
x_q4
=
x
*
(
16
/
factor
)
*
src_w
/
dst_w
;
const
uint8_t
*
src_ptr
=
srcs
[
i
]
+
(
y
/
factor
)
*
src_h
/
dst_h
*
src_stride
+
(
x
/
factor
)
*
src_w
/
dst_w
;
uint8_t
*
dst_ptr
=
dsts
[
i
]
+
(
y
/
factor
)
*
dst_stride
+
(
x
/
factor
);
vpx_scaled_2d
(
src_ptr
,
src_stride
,
dst_ptr
,
dst_stride
,
vpx_scaled_2d
(
src_ptr
,
src_stride
,
dst_ptr
,
dst_stride
,
kernel
[
x_q4
&
0xf
],
16
*
src_w
/
dst_w
,
kernel
[
x_q4
&
0xf
],
16
*
src_w
/
dst_w
,
kernel
[
y_q4
&
0xf
],
16
*
src_h
/
dst_h
,
kernel
[
y_q4
&
0xf
],
16
*
src_h
/
dst_h
,
16
/
factor
,
16
/
factor
);
16
/
factor
,
16
/
factor
);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
}
}
}
}
vpx_extend_frame_borders
(
dst
);
vpx_extend_frame_borders
(
dst
);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static
int
scale_down
(
VP9_COMP
*
cpi
,
int
q
)
{
static
int
scale_down
(
VP9_COMP
*
cpi
,
int
q
)
{
RATE_CONTROL
*
const
rc
=
&
cpi
->
rc
;
RATE_CONTROL
*
const
rc
=
&
cpi
->
rc
;
...
@@ -2927,7 +2953,7 @@ void vp9_scale_references(VP9_COMP *cpi) {
...
@@ -2927,7 +2953,7 @@ void vp9_scale_references(VP9_COMP *cpi) {
cm
->
subsampling_x
,
cm
->
subsampling_y
,
cm
->
subsampling_x
,
cm
->
subsampling_y
,
VP9_ENC_BORDER_IN_PIXELS
,
cm
->
byte_alignment
,
VP9_ENC_BORDER_IN_PIXELS
,
cm
->
byte_alignment
,
NULL
,
NULL
,
NULL
);
NULL
,
NULL
,
NULL
);
scale_and_extend_frame
(
ref
,
&
new_fb_ptr
->
buf
);
vp9_
scale_and_extend_frame
(
ref
,
&
new_fb_ptr
->
buf
);
cpi
->
scaled_ref_idx
[
ref_frame
-
1
]
=
new_fb
;
cpi
->
scaled_ref_idx
[
ref_frame
-
1
]
=
new_fb
;
alloc_frame_mvs
(
cm
,
new_fb
);
alloc_frame_mvs
(
cm
,
new_fb
);
}
}
...
@@ -3290,7 +3316,6 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
...
@@ -3290,7 +3316,6 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
vpx_clear_system_state
();
vpx_clear_system_state
();
set_frame_size
(
cpi
);
set_frame_size
(
cpi
);
cpi
->
Source
=
vp9_scale_if_required
(
cm
,
cpi
->
Source
=
vp9_scale_if_required
(
cm
,
cpi
->
un_scaled_source
,
cpi
->
un_scaled_source
,
&
cpi
->
scaled_source
,
&
cpi
->
scaled_source
,
...
@@ -3308,7 +3333,6 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
...
@@ -3308,7 +3333,6 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
cpi
->
unscaled_last_source
,
cpi
->
unscaled_last_source
,
&
cpi
->
scaled_last_source
,
&
cpi
->
scaled_last_source
,
(
cpi
->
oxcf
.
pass
==
0
));
(
cpi
->
oxcf
.
pass
==
0
));
vp9_update_noise_estimate
(
cpi
);
vp9_update_noise_estimate
(
cpi
);
if
(
cpi
->
oxcf
.
pass
==
0
&&
if
(
cpi
->
oxcf
.
pass
==
0
&&
...
@@ -3722,7 +3746,7 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
...
@@ -3722,7 +3746,7 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
if
(
use_normative_scaler
&&
if
(
use_normative_scaler
&&
unscaled
->
y_width
<=
(
scaled
->
y_width
<<
1
)
&&
unscaled
->
y_width
<=
(
scaled
->
y_width
<<
1
)
&&
unscaled
->
y_height
<=
(
scaled
->
y_height
<<
1
))
unscaled
->
y_height
<=
(
scaled
->
y_height
<<
1
))
scale_and_extend_frame
(
unscaled
,
scaled
);
vp9_
scale_and_extend_frame
(
unscaled
,
scaled
);
else
else
scale_and_extend_frame_nonnormative
(
unscaled
,
scaled
);
scale_and_extend_frame_nonnormative
(
unscaled
,
scaled
);
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP9_HIGHBITDEPTH
...
...
vp9/encoder/x86/vp9_frame_scale_ssse3.c
0 → 100644
View file @
989c6930
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#if defined(_MSC_VER) && _MSC_VER <= 1500
// Need to include math.h before calling tmmintrin.h/intrin.h
// in certain versions of MSVS.
#include <math.h>
#endif
#include <tmmintrin.h> // SSSE3
#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_scale/yv12config.h"
extern
void
vp9_scale_and_extend_frame_c
(
const
YV12_BUFFER_CONFIG
*
src
,
YV12_BUFFER_CONFIG
*
dst
);
void
downsample_2_to_1_ssse3
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
int
w
,
int
h
)
{
const
__m128i
mask
=
_mm_set1_epi16
(
0x00FF
);
const
int
max_width
=
w
&
~
15
;
int
y
;
for
(
y
=
0
;
y
<
h
;
++
y
)
{
int
x
;
for
(
x
=
0
;
x
<
max_width
;
x
+=
16
)
{
const
__m128i
a
=
_mm_loadu_si128
((
const
__m128i
*
)(
src
+
x
*
2
+
0
));
const
__m128i
b
=
_mm_loadu_si128
((
const
__m128i
*
)(
src
+
x
*
2
+
16
));
const
__m128i
a_and
=
_mm_and_si128
(
a
,
mask
);
const
__m128i
b_and
=
_mm_and_si128
(
b
,
mask
);
const
__m128i
c
=
_mm_packus_epi16
(
a_and
,
b_and
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
c
);
}
for
(;
x
<
w
;
++
x
)
dst
[
x
]
=
src
[
x
*
2
];
src
+=
src_stride
*
2
;
dst
+=
dst_stride
;
}
}
static
INLINE
__m128i
filter
(
const
__m128i
*
const
a
,
const
__m128i
*
const
b
,
const
__m128i
*
const
c
,
const
__m128i
*
const
d
,
const
__m128i
*
const
e
,
const
__m128i
*
const
f
,
const
__m128i
*
const
g
,
const
__m128i
*
const
h
)
{
const
__m128i
coeffs_ab
=
_mm_set_epi8
(
6
,
-
1
,
6
,
-
1
,
6
,
-
1
,
6
,
-
1
,
6
,
-
1
,
6
,
-
1
,
6
,
-
1
,
6
,
-
1
);
const
__m128i
coeffs_cd
=
_mm_set_epi8
(
78
,
-
19
,
78
,
-
19
,
78
,
-
19
,
78
,
-
19
,
78
,
-
19
,
78
,
-
19
,
78
,
-
19
,
78
,
-
19
);
const
__m128i
const64_x16
=
_mm_set1_epi16
(
64
);
const
__m128i
ab
=
_mm_unpacklo_epi8
(
*
a
,
*
b
);
const
__m128i
cd
=
_mm_unpacklo_epi8
(
*
c
,
*
d
);
const
__m128i
fe
=
_mm_unpacklo_epi8
(
*
f
,
*
e
);
const
__m128i
hg
=
_mm_unpacklo_epi8
(
*
h
,
*
g
);
const
__m128i
ab_terms
=
_mm_maddubs_epi16
(
ab
,
coeffs_ab
);
const
__m128i
cd_terms
=
_mm_maddubs_epi16
(
cd
,
coeffs_cd
);
const
__m128i
fe_terms
=
_mm_maddubs_epi16
(
fe
,
coeffs_cd
);
const
__m128i
hg_terms
=
_mm_maddubs_epi16
(
hg
,
coeffs_ab
);
// can not overflow
const
__m128i
abcd_terms
=
_mm_add_epi16
(
ab_terms
,
cd_terms
);
// can not overflow
const
__m128i
fehg_terms
=
_mm_add_epi16
(
fe_terms
,
hg_terms
);
// can overflow, use saturating add
const
__m128i
terms
=
_mm_adds_epi16
(
abcd_terms
,
fehg_terms
);
const
__m128i
round
=
_mm_adds_epi16
(
terms
,
const64_x16
);
const
__m128i
shift
=
_mm_srai_epi16
(
round
,
7
);
return
_mm_packus_epi16
(
shift
,
shift
);
}
static
void
eight_tap_row_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
w
)
{
const
int
max_width
=
w
&
~
7
;
int
x
=
0
;
for
(;
x
<
max_width
;
x
+=
8
)
{
const
__m128i
a
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
0
));
const
__m128i
b
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
1
));
const
__m128i
c
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
2
));
const
__m128i
d
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
3
));
const
__m128i
e
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
4
));
const
__m128i
f
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
5
));
const
__m128i
g
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
6
));
const
__m128i
h
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
7
));
const
__m128i
pack
=
filter
(
&
a
,
&
b
,
&
c
,
&
d
,
&
e
,
&
f
,
&
g
,
&
h
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
x
),
pack
);
}
}
void
upsample_1_to_2_ssse3
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
int
dst_w
,
int
dst_h
)
{
dst_w
/=
2
;
dst_h
/=
2
;
{
DECLARE_ALIGNED
(
16
,
uint8_t
,
tmp
[
1920
*
8
]);
uint8_t
*
tmp0
=
tmp
+
dst_w
*
0
;
uint8_t
*
tmp1
=
tmp
+
dst_w
*
1
;
uint8_t
*
tmp2
=
tmp
+
dst_w
*
2
;
uint8_t
*
tmp3
=
tmp
+
dst_w
*
3
;
uint8_t
*
tmp4
=
tmp
+
dst_w
*
4
;
uint8_t
*
tmp5
=
tmp
+
dst_w
*
5
;
uint8_t
*
tmp6
=
tmp
+
dst_w
*
6
;
uint8_t
*
tmp7
=
tmp
+
dst_w
*
7
;
uint8_t
*
tmp8
=
NULL
;
const
int
max_width
=
dst_w
&
~
7
;
int
y
;
eight_tap_row_ssse3
(
src
-
src_stride
*
3
-
3
,
tmp0
,
dst_w
);
eight_tap_row_ssse3
(
src
-
src_stride
*
2
-
3
,
tmp1
,
dst_w
);
eight_tap_row_ssse3
(
src
-
src_stride
*
1
-
3
,
tmp2
,
dst_w
);
eight_tap_row_ssse3
(
src
+
src_stride
*
0
-
3
,
tmp3
,
dst_w
);
eight_tap_row_ssse3
(
src
+
src_stride
*
1
-
3
,
tmp4
,
dst_w
);
eight_tap_row_ssse3
(
src
+
src_stride
*
2
-
3
,
tmp5
,
dst_w
);
eight_tap_row_ssse3
(
src
+
src_stride
*
3
-
3
,
tmp6
,
dst_w
);
for
(
y
=
0
;
y
<
dst_h
;
y
++
)
{
int
x
;
eight_tap_row_ssse3
(
src
+
src_stride
*
4
-
3
,
tmp7
,
dst_w
);
for
(
x
=
0
;
x
<
max_width
;
x
+=
8
)
{
const
__m128i
A
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
));
const
__m128i
B
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp3
+
x
));
const
__m128i
AB
=
_mm_unpacklo_epi8
(
A
,
B
);
__m128i
C
,
D
,
CD
;
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
*
2
),
AB
);
{
const
__m128i
a
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
-
src_stride
*
3
));
const
__m128i
b
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
-
src_stride
*
2
));
const
__m128i
c
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
-
src_stride
*
1
));
const
__m128i
d
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
src_stride
*
0
));
const
__m128i
e
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
src_stride
*
1
));
const
__m128i
f
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
src_stride
*
2
));
const
__m128i
g
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
src_stride
*
3
));
const
__m128i
h
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
src_stride
*
4
));
C
=
filter
(
&
a
,
&
b
,
&
c
,
&
d
,
&
e
,
&
f
,
&
g
,
&
h
);
}
{
const
__m128i
a
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp0
+
x
));
const
__m128i
b
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp1
+
x
));
const
__m128i
c
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp2
+
x
));
const
__m128i
d
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp3
+
x
));
const
__m128i
e
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp4
+
x
));
const
__m128i
f
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp5
+
x
));
const
__m128i
g
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp6
+
x
));
const
__m128i
h
=
_mm_loadl_epi64
((
const
__m128i
*
)(
tmp7
+
x
));
D
=
filter
(
&
a
,
&
b
,
&
c
,
&
d
,
&
e
,
&
f
,
&
g
,
&
h
);
}
CD
=
_mm_unpacklo_epi8
(
C
,
D
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
*
2
+
dst_stride
),
CD
);
}
src
+=
src_stride
;
dst
+=
dst_stride
*
2
;
tmp8
=
tmp0
;
tmp0
=
tmp1
;
tmp1
=
tmp2
;
tmp2
=
tmp3
;
tmp3
=
tmp4
;
tmp4
=
tmp5
;
tmp5
=
tmp6
;
tmp6
=
tmp7
;
tmp7
=
tmp8
;
}
}
}
void
vp9_scale_and_extend_frame_ssse3
(
const
YV12_BUFFER_CONFIG
*
src
,
YV12_BUFFER_CONFIG
*
dst
)
{
const
int
src_w
=
src
->
y_crop_width
;
const
int
src_h
=
src
->
y_crop_height
;
const
int
dst_w
=
dst
->
y_crop_width
;
const
int
dst_h
=
dst
->
y_crop_height
;
const
int
dst_uv_w
=
dst_w
/
2
;
const
int
dst_uv_h
=
dst_h
/
2
;
if
(
dst_w
*
2
==
src_w
&&
dst_h
*
2
==
src_h
)
{
downsample_2_to_1_ssse3
(
src
->
y_buffer
,
src
->
y_stride
,
dst
->
y_buffer
,
dst
->
y_stride
,
dst_w
,
dst_h
);
downsample_2_to_1_ssse3
(
src
->
u_buffer
,
src
->
uv_stride
,
dst
->
u_buffer
,
dst
->
uv_stride
,
dst_uv_w
,
dst_uv_h
);
downsample_2_to_1_ssse3
(
src
->
v_buffer
,
src
->
uv_stride
,
dst
->
v_buffer
,
dst
->
uv_stride
,
dst_uv_w
,
dst_uv_h
);
vpx_extend_frame_borders
(
dst
);
}
else
if
(
dst_w
==
src_w
*
2
&&
dst_h
==
src_h
*
2
)
{
// The upsample() supports widths up to 1920 * 2. If greater, fall back
// to vp9_scale_and_extend_frame_c().
if
(
dst_w
/
2
<=
1920
)
{
upsample_1_to_2_ssse3
(
src
->
y_buffer
,
src
->
y_stride
,
dst
->
y_buffer
,
dst
->
y_stride
,
dst_w
,
dst_h
);
upsample_1_to_2_ssse3
(
src
->
u_buffer
,
src
->
uv_stride
,
dst
->
u_buffer
,
dst
->
uv_stride
,
dst_uv_w
,
dst_uv_h
);
upsample_1_to_2_ssse3
(
src
->
v_buffer
,
src
->
uv_stride
,
dst
->
v_buffer
,
dst
->
uv_stride
,
dst_uv_w
,
dst_uv_h
);
vpx_extend_frame_borders
(
dst
);
}
else
{
vp9_scale_and_extend_frame_c
(
src
,
dst
);
}
}
else
{
vp9_scale_and_extend_frame_c
(
src
,
dst
);
}
}
vp9/vp9cx.mk
View file @
989c6930
...
@@ -119,6 +119,7 @@ endif
...
@@ -119,6 +119,7 @@ endif
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/vp9_dct_ssse3.c
VP9_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/vp9_dct_ssse3.c
VP9_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/vp9_frame_scale_ssse3.c
ifeq
($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
ifeq
($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_denoiser_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_denoiser_sse2.c
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment