Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
2ab7b9a6
Commit
2ab7b9a6
authored
May 27, 2016
by
Linfeng Zhang
Committed by
Gerrit Code Review
May 27, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Upgrade fwht4x4_mmx() to fwht4x4_sse2() for vp9 and vp10."
parents
0ba9b299
af7fb17c
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
183 additions
and
18 deletions
+183
-18
test/fdct4x4_test.cc
test/fdct4x4_test.cc
+2
-10
vp10/common/vp10_rtcd_defs.pl
vp10/common/vp10_rtcd_defs.pl
+2
-2
vp10/encoder/x86/dct_intrin_sse2.c
vp10/encoder/x86/dct_intrin_sse2.c
+0
-0
vp10/encoder/x86/dct_sse2.asm
vp10/encoder/x86/dct_sse2.asm
+86
-0
vp10/vp10cx.mk
vp10/vp10cx.mk
+2
-2
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_rtcd_defs.pl
+2
-2
vp9/encoder/x86/vp9_dct_intrin_sse2.c
vp9/encoder/x86/vp9_dct_intrin_sse2.c
+0
-0
vp9/encoder/x86/vp9_dct_sse2.asm
vp9/encoder/x86/vp9_dct_sse2.asm
+87
-0
vp9/vp9cx.mk
vp9/vp9cx.mk
+2
-2
No files found.
test/fdct4x4_test.cc
View file @
2ab7b9a6
...
...
@@ -487,19 +487,11 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
&
vp9_fht4x4_c
,
&
vp9_iht4x4_16_add_neon
,
3
,
VPX_BITS_8
)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
MMX
,
Trans4x4WHT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fwht4x4_mmx
,
&
vpx_iwht4x4_16_add_c
,
0
,
VPX_BITS_8
)));
#endif
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans4x4WHT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_fwht4x4_sse2
,
&
vpx_iwht4x4_16_add_c
,
0
,
VPX_BITS_8
),
make_tuple
(
&
vp9_fwht4x4_c
,
&
vpx_iwht4x4_16_add_sse2
,
0
,
VPX_BITS_8
)));
#endif
...
...
vp10/common/vp10_rtcd_defs.pl
View file @
2ab7b9a6
...
...
@@ -398,7 +398,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize
qw/vp10_fht16x16 sse2/
;
add_proto
qw/void vp10_fwht4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/vp10_fwht4x4/
,
"
$
mmx
_x86inc
";
specialize
qw/vp10_fwht4x4/
,
"
$
sse2
_x86inc
";
}
else
{
add_proto
qw/void vp10_fht4x4/
,
"
const int16_t *input, tran_low_t *output, int stride, int tx_type
";
specialize
qw/vp10_fht4x4 sse2 msa/
;
...
...
@@ -410,7 +410,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize
qw/vp10_fht16x16 sse2 msa/
;
add_proto
qw/void vp10_fwht4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/vp10_fwht4x4 msa/
,
"
$
mmx
_x86inc
";
specialize
qw/vp10_fwht4x4 msa/
,
"
$
sse2
_x86inc
";
}
# Inverse transform
...
...
vp10/encoder/x86/dct_sse2.c
→
vp10/encoder/x86/dct_
intrin_
sse2.c
View file @
2ab7b9a6
File moved
vp10/encoder/x86/dct_
mmx
.asm
→
vp10/encoder/x86/dct_
sse2
.asm
View file @
2ab7b9a6
;
; Copyright (c) 201
4
The WebM project authors. All Rights Reserved.
; Copyright (c) 201
6
The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
...
...
@@ -32,22 +32,18 @@ SECTION .text
%endmacro
%macro TRANSPOSE_4X4 0
movq
m4
,
m0
movq
m5
,
m2
punpcklwd
m4
,
m1
punpckhwd
m0
,
m1
punpcklwd
m5
,
m3
punpckhwd
m2
,
m3
movq
m1
,
m4
movq
m3
,
m0
punpckldq
m1
,
m5
punpckhdq
m4
,
m5
punpckldq
m3
,
m2
punpckhdq
m0
,
m2
SWAP
2
,
3
,
0
,
1
,
4
; 00 01 02 03
; 10 11 12 13
; 20 21 22 23
; 30 31 32 33
punpcklwd
m0
,
m1
; 00 10 01 11 02 12 03 13
punpcklwd
m2
,
m3
; 20 30 21 31 22 32 23 33
mova
m1
,
m0
punpckldq
m0
,
m2
; 00 10 20 30 01 11 21 31
punpckhdq
m1
,
m2
; 02 12 22 32 03 13 23 33
%endmacro
INIT_MM
X
mmx
INIT_
X
MM
ss
e2
cglobal
fwht4x4
,
3
,
4
,
8
,
input
,
output
,
stride
lea
r3q
,
[
inputq
+
strideq
*
4
]
movq
m0
,
[
inputq
]
;a1
...
...
@@ -57,48 +53,34 @@ cglobal fwht4x4, 3, 4, 8, input, output, stride
TRANSFORM_COLS
TRANSPOSE_4X4
SWAP
1
,
2
psrldq
m1
,
m0
,
8
psrldq
m3
,
m2
,
8
TRANSFORM_COLS
TRANSPOSE_4X4
psllw
m0
,
2
psllw
m1
,
2
psllw
m2
,
2
psllw
m3
,
2
%if CONFIG_VP9_HIGHBITDEPTH
pxor
m4
,
m4
pxor
m5
,
m5
pcmpgtw
m4
,
m0
pcmpgtw
m5
,
m1
movq
m6
,
m0
movq
m7
,
m1
punpcklwd
m0
,
m4
punpcklwd
m1
,
m5
punpckhwd
m6
,
m4
punpckhwd
m7
,
m5
movq
[
outputq
],
m0
movq
[
outputq
+
8
],
m6
movq
[
outputq
+
16
],
m1
movq
[
outputq
+
24
],
m7
pxor
m4
,
m4
pxor
m5
,
m5
pcmpgtw
m4
,
m2
pcmpgtw
m5
,
m3
movq
m6
,
m2
movq
m7
,
m3
punpcklwd
m2
,
m4
punpcklwd
m3
,
m5
punpckhwd
m6
,
m4
punpckhwd
m7
,
m5
movq
[
outputq
+
32
],
m2
movq
[
outputq
+
40
],
m6
movq
[
outputq
+
48
],
m3
movq
[
outputq
+
56
],
m7
; sign extension
mova
m2
,
m0
mova
m3
,
m1
punpcklwd
m0
,
m0
punpcklwd
m1
,
m1
punpckhwd
m2
,
m2
punpckhwd
m3
,
m3
psrad
m0
,
16
psrad
m1
,
16
psrad
m2
,
16
psrad
m3
,
16
mova
[
outputq
],
m0
mova
[
outputq
+
16
],
m2
mova
[
outputq
+
32
],
m1
mova
[
outputq
+
48
],
m3
%else
movq
[
outputq
],
m0
movq
[
outputq
+
8
],
m1
movq
[
outputq
+
16
],
m2
movq
[
outputq
+
24
],
m3
mova
[
outputq
],
m0
mova
[
outputq
+
16
],
m1
%endif
RET
vp10/vp10cx.mk
View file @
2ab7b9a6
...
...
@@ -93,7 +93,7 @@ VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c
endif
ifeq
($(CONFIG_USE_X86INC),yes)
VP10_CX_SRCS-$(HAVE_
MMX
)
+=
encoder/x86/dct_
mmx
.asm
VP10_CX_SRCS-$(HAVE_
SSE2
)
+=
encoder/x86/dct_
sse2
.asm
VP10_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/error_sse2.asm
endif
...
...
@@ -103,7 +103,7 @@ VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3_x86_64.asm
endif
endif
VP10_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/dct_sse2.c
VP10_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/dct_
intrin_
sse2.c
VP10_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/dct_ssse3.c
ifeq
($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
2ab7b9a6
...
...
@@ -245,7 +245,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize
qw/vp9_fht16x16 sse2/
;
add_proto
qw/void vp9_fwht4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/vp9_fwht4x4/
,
"
$
mmx
_x86inc
";
specialize
qw/vp9_fwht4x4/
,
"
$
sse2
_x86inc
";
}
else
{
add_proto
qw/void vp9_fht4x4/
,
"
const int16_t *input, tran_low_t *output, int stride, int tx_type
";
specialize
qw/vp9_fht4x4 sse2 msa/
;
...
...
@@ -257,7 +257,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize
qw/vp9_fht16x16 sse2 msa/
;
add_proto
qw/void vp9_fwht4x4/
,
"
const int16_t *input, tran_low_t *output, int stride
";
specialize
qw/vp9_fwht4x4 msa/
,
"
$
mmx
_x86inc
";
specialize
qw/vp9_fwht4x4 msa/
,
"
$
sse2
_x86inc
";
}
#
...
...
vp9/encoder/x86/vp9_dct_sse2.c
→
vp9/encoder/x86/vp9_dct_
intrin_
sse2.c
View file @
2ab7b9a6
File moved
vp9/encoder/x86/vp9_dct_
mmx
.asm
→
vp9/encoder/x86/vp9_dct_
sse2
.asm
View file @
2ab7b9a6
;
; Copyright (c) 201
4
The WebM project authors. All Rights Reserved.
; Copyright (c) 201
6
The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
...
...
@@ -32,23 +32,20 @@ SECTION .text
%endmacro
%macro TRANSPOSE_4X4 0
movq
m4
,
m0
movq
m5
,
m2
punpcklwd
m4
,
m1
punpckhwd
m0
,
m1
punpcklwd
m5
,
m3
punpckhwd
m2
,
m3
movq
m1
,
m4
movq
m3
,
m0
punpckldq
m1
,
m5
punpckhdq
m4
,
m5
punpckldq
m3
,
m2
punpckhdq
m0
,
m2
SWAP
2
,
3
,
0
,
1
,
4
; 00 01 02 03
; 10 11 12 13
; 20 21 22 23
; 30 31 32 33
punpcklwd
m0
,
m1
; 00 10 01 11 02 12 03 13
punpcklwd
m2
,
m3
; 20 30 21 31 22 32 23 33
mova
m1
,
m0
punpckldq
m0
,
m2
; 00 10 20 30 01 11 21 31
punpckhdq
m1
,
m2
; 02 12 22 32 03 13 23 33
%endmacro
INIT_MM
X
mmx
INIT_
X
MM
ss
e2
cglobal
fwht4x4
,
3
,
4
,
8
,
input
,
output
,
stride
; TODO(linfeng): The duplication with vp10 should be resolved.
lea
r3q
,
[
inputq
+
strideq
*
4
]
movq
m0
,
[
inputq
]
;a1
movq
m1
,
[
inputq
+
strideq
*
2
]
;b1
...
...
@@ -57,48 +54,34 @@ cglobal fwht4x4, 3, 4, 8, input, output, stride
TRANSFORM_COLS
TRANSPOSE_4X4
SWAP
1
,
2
psrldq
m1
,
m0
,
8
psrldq
m3
,
m2
,
8
TRANSFORM_COLS
TRANSPOSE_4X4
psllw
m0
,
2
psllw
m1
,
2
psllw
m2
,
2
psllw
m3
,
2
%if CONFIG_VP9_HIGHBITDEPTH
pxor
m4
,
m4
pxor
m5
,
m5
pcmpgtw
m4
,
m0
pcmpgtw
m5
,
m1
movq
m6
,
m0
movq
m7
,
m1
punpcklwd
m0
,
m4
punpcklwd
m1
,
m5
punpckhwd
m6
,
m4
punpckhwd
m7
,
m5
movq
[
outputq
],
m0
movq
[
outputq
+
8
],
m6
movq
[
outputq
+
16
],
m1
movq
[
outputq
+
24
],
m7
pxor
m4
,
m4
pxor
m5
,
m5
pcmpgtw
m4
,
m2
pcmpgtw
m5
,
m3
movq
m6
,
m2
movq
m7
,
m3
punpcklwd
m2
,
m4
punpcklwd
m3
,
m5
punpckhwd
m6
,
m4
punpckhwd
m7
,
m5
movq
[
outputq
+
32
],
m2
movq
[
outputq
+
40
],
m6
movq
[
outputq
+
48
],
m3
movq
[
outputq
+
56
],
m7
; sign extension
mova
m2
,
m0
mova
m3
,
m1
punpcklwd
m0
,
m0
punpcklwd
m1
,
m1
punpckhwd
m2
,
m2
punpckhwd
m3
,
m3
psrad
m0
,
16
psrad
m1
,
16
psrad
m2
,
16
psrad
m3
,
16
mova
[
outputq
],
m0
mova
[
outputq
+
16
],
m2
mova
[
outputq
+
32
],
m1
mova
[
outputq
+
48
],
m3
%else
movq
[
outputq
],
m0
movq
[
outputq
+
8
],
m1
movq
[
outputq
+
16
],
m2
movq
[
outputq
+
24
],
m3
mova
[
outputq
],
m0
mova
[
outputq
+
16
],
m1
%endif
RET
vp9/vp9cx.mk
View file @
2ab7b9a6
...
...
@@ -101,7 +101,7 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
ifeq
($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_
MMX
)
+=
encoder/x86/vp9_dct_
mmx
.asm
VP9_CX_SRCS-$(HAVE_
SSE2
)
+=
encoder/x86/vp9_dct_
sse2
.asm
ifeq
($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_highbd_error_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX)
+=
encoder/x86/vp9_highbd_error_avx.asm
...
...
@@ -116,7 +116,7 @@ VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
endif
endif
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_dct_
intrin_
sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/vp9_dct_ssse3.c
ifneq
($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/vp9_frame_scale_ssse3.c
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment