Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
f3b9b97c
Commit
f3b9b97c
authored
Jan 09, 2014
by
Yunqing Wang
Committed by
Gerrit Code Review
Jan 09, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge "SSSE3 convolution optimization"
parents
6d812d6f
511d218c
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
685 additions
and
7 deletions
+685
-7
vp9/common/x86/vp9_asm_stubs.c
vp9/common/x86/vp9_asm_stubs.c
+93
-7
vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
+591
-0
vp9/vp9_common.mk
vp9/vp9_common.mk
+1
-0
No files found.
vp9/common/x86/vp9_asm_stubs.c
View file @
f3b9b97c
...
...
@@ -23,20 +23,105 @@ typedef void filter8_1dfunction (
const
short
*
filter
);
#if HAVE_SSSE3
filter8_1dfunction
vp9_filter_block1d16_v8_ssse3
;
filter8_1dfunction
vp9_filter_block1d16_h8_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_v8_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_h8_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_v8_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_h8_ssse3
;
#if (HAVE_SSSE3)
filter8_1dfunction
vp9_filter_block1d16_v8_avg_ssse3
;
filter8_1dfunction
vp9_filter_block1d16_h8_avg_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_v8_avg_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_h8_avg_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_v8_avg_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_h8_avg_ssse3
;
#if (ARCH_X86_64)
filter8_1dfunction
vp9_filter_block1d16_v8_intrin_ssse3
;
filter8_1dfunction
vp9_filter_block1d16_h8_intrin_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_v8_intrin_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_h8_intrin_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_v8_intrin_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_h8_intrin_ssse3
;
void
vp9_convolve8_horiz_ssse3
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
/* Ensure the filter can be compressed to int16_t. */
if
(
x_step_q4
==
16
&&
filter_x
[
3
]
!=
128
)
{
while
(
w
>=
16
)
{
vp9_filter_block1d16_h8_intrin_ssse3
(
src
,
src_stride
,
dst
,
dst_stride
,
h
,
filter_x
);
src
+=
16
;
dst
+=
16
;
w
-=
16
;
}
while
(
w
>=
8
)
{
vp9_filter_block1d8_h8_intrin_ssse3
(
src
,
src_stride
,
dst
,
dst_stride
,
h
,
filter_x
);
src
+=
8
;
dst
+=
8
;
w
-=
8
;
}
while
(
w
>=
4
)
{
vp9_filter_block1d4_h8_intrin_ssse3
(
src
,
src_stride
,
dst
,
dst_stride
,
h
,
filter_x
);
src
+=
4
;
dst
+=
4
;
w
-=
4
;
}
}
if
(
w
)
{
vp9_convolve8_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
}
}
void
vp9_convolve8_vert_ssse3
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
if
(
y_step_q4
==
16
&&
filter_y
[
3
]
!=
128
)
{
while
(
w
>=
16
)
{
vp9_filter_block1d16_v8_intrin_ssse3
(
src
-
src_stride
*
3
,
src_stride
,
dst
,
dst_stride
,
h
,
filter_y
);
src
+=
16
;
dst
+=
16
;
w
-=
16
;
}
while
(
w
>=
8
)
{
vp9_filter_block1d8_v8_intrin_ssse3
(
src
-
src_stride
*
3
,
src_stride
,
dst
,
dst_stride
,
h
,
filter_y
);
src
+=
8
;
dst
+=
8
;
w
-=
8
;
}
while
(
w
>=
4
)
{
vp9_filter_block1d4_v8_intrin_ssse3
(
src
-
src_stride
*
3
,
src_stride
,
dst
,
dst_stride
,
h
,
filter_y
);
src
+=
4
;
dst
+=
4
;
w
-=
4
;
}
}
if
(
w
)
{
vp9_convolve8_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
}
}
#else
filter8_1dfunction
vp9_filter_block1d16_v8_ssse3
;
filter8_1dfunction
vp9_filter_block1d16_h8_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_v8_ssse3
;
filter8_1dfunction
vp9_filter_block1d8_h8_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_v8_ssse3
;
filter8_1dfunction
vp9_filter_block1d4_h8_ssse3
;
void
vp9_convolve8_horiz_ssse3
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
...
...
@@ -113,6 +198,7 @@ void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
w
,
h
);
}
}
#endif
void
vp9_convolve8_avg_horiz_ssse3
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
...
...
vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
0 → 100644
View file @
f3b9b97c
This diff is collapsed.
Click to expand it.
vp9/vp9_common.mk
View file @
f3b9b97c
...
...
@@ -74,6 +74,7 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(HAVE_MMX)
+=
common/x86/vp9_loopfilter_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/vp9_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3)
+=
common/x86/vp9_subpixel_8t_ssse3.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3)
+=
common/x86/vp9_subpixel_8t_intrin_ssse3.c
ifeq
($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MMX)
+=
common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/vp9_postproc_sse2.asm
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment