Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
d289deb0
Commit
d289deb0
authored
May 06, 2014
by
Jingning Han
Committed by
Gerrit Code Review
May 06, 2014
Browse files
Merge "SSSE3 implementation of full inverse 8x8 2D-DCT"
parents
5504b3c2
52ae97b6
Changes
4
Hide whitespace changes
Inline
Side-by-side
vp9/common/vp9_rtcd_defs.pl
View file @
d289deb0
...
...
@@ -310,7 +310,7 @@ add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int
specialize
qw/vp9_idct8x8_1_add sse2 neon dspr2/
;
add_proto
qw/void vp9_idct8x8_64_add/
,
"
const int16_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_64_add sse2 neon dspr2/
;
specialize
qw/vp9_idct8x8_64_add sse2 neon dspr2/
,
"
$ssse3_x86_64
"
;
add_proto
qw/void vp9_idct8x8_10_add/
,
"
const int16_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vp9_idct8x8_10_add sse2 neon dspr2/
;
...
...
vp9/common/x86/vp9_idct_ssse3.asm
0 → 100644
View file @
d289deb0
;
; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "third_party/x86inc/x86inc.asm"
; This file provides SSSE3 version of the inverse transformation. Part
; of the functions are originally derived from the ffmpeg project.
; Note that the current version applies to x86 64-bit only.
SECTION
_RODATA
pw_11585x2:
times
8
dw
23170
pd_8192:
times
4
dd
8192
pw_16:
times
8
dw
16
%macro TRANSFORM_COEFFS 2
pw_
%
1
_
%
2
:
dw
%
1
,
%
2
,
%
1
,
%
2
,
%
1
,
%
2
,
%
1
,
%
2
pw_m
%
2
_
%
1
:
dw
-%
2
,
%
1
,
-%
2
,
%
1
,
-%
2
,
%
1
,
-%
2
,
%
1
%endmacro
TRANSFORM_COEFFS
6270
,
15137
TRANSFORM_COEFFS
3196
,
16069
TRANSFORM_COEFFS
13623
,
9102
SECTION
.text
%if ARCH_X86_64
%macro SUM_SUB 3
psubw
m
%
3
,
m
%
1
,
m
%
2
paddw
m
%
1
,
m
%
2
SWAP
%
2
,
%
3
%endmacro
; butterfly operation
%macro MUL_ADD_2X 6
; dst1, dst2, src, round, coefs1, coefs2
pmaddwd
m
%
1
,
m
%
3
,
%
5
pmaddwd
m
%
2
,
m
%
3
,
%
6
paddd
m
%
1
,
%
4
paddd
m
%
2
,
%
4
psrad
m
%
1
,
14
psrad
m
%
2
,
14
%endmacro
%macro BUTTERFLY_4X 7
; dst1, dst2, coef1, coef2, round, tmp1, tmp2
punpckhwd
m
%
6
,
m
%
2
,
m
%
1
MUL_ADD_2X
%
7
,
%
6
,
%
6
,
%
5
,
[
pw_m
%
4
_
%
3
],
[
pw_
%
3
_
%
4
]
punpcklwd
m
%
2
,
m
%
1
MUL_ADD_2X
%
1
,
%
2
,
%
2
,
%
5
,
[
pw_m
%
4
_
%
3
],
[
pw_
%
3
_
%
4
]
packssdw
m
%
1
,
m
%
7
packssdw
m
%
2
,
m
%
6
%endmacro
; matrix transpose
%macro INTERLEAVE_2X 4
punpckh
%
1
m
%
4
,
m
%
2
,
m
%
3
punpckl
%
1
m
%
2
,
m
%
3
SWAP
%
3
,
%
4
%endmacro
%macro TRANSPOSE8X8 9
INTERLEAVE_2X
wd
,
%
1
,
%
2
,
%
9
INTERLEAVE_2X
wd
,
%
3
,
%
4
,
%
9
INTERLEAVE_2X
wd
,
%
5
,
%
6
,
%
9
INTERLEAVE_2X
wd
,
%
7
,
%
8
,
%
9
INTERLEAVE_2X
dq
,
%
1
,
%
3
,
%
9
INTERLEAVE_2X
dq
,
%
2
,
%
4
,
%
9
INTERLEAVE_2X
dq
,
%
5
,
%
7
,
%
9
INTERLEAVE_2X
dq
,
%
6
,
%
8
,
%
9
INTERLEAVE_2X
qdq
,
%
1
,
%
5
,
%
9
INTERLEAVE_2X
qdq
,
%
3
,
%
7
,
%
9
INTERLEAVE_2X
qdq
,
%
2
,
%
6
,
%
9
INTERLEAVE_2X
qdq
,
%
4
,
%
8
,
%
9
SWAP
%
2
,
%
5
SWAP
%
4
,
%
7
%endmacro
%macro IDCT8_1D 0
SUM_SUB
0
,
4
,
9
BUTTERFLY_4X
2
,
6
,
6270
,
15137
,
m8
,
9
,
10
pmulhrsw
m0
,
m12
pmulhrsw
m4
,
m12
BUTTERFLY_4X
1
,
7
,
3196
,
16069
,
m8
,
9
,
10
BUTTERFLY_4X
5
,
3
,
13623
,
9102
,
m8
,
9
,
10
SUM_SUB
1
,
5
,
9
SUM_SUB
7
,
3
,
9
SUM_SUB
0
,
6
,
9
SUM_SUB
4
,
2
,
9
SUM_SUB
3
,
5
,
9
pmulhrsw
m3
,
m12
pmulhrsw
m5
,
m12
SUM_SUB
0
,
7
,
9
SUM_SUB
4
,
3
,
9
SUM_SUB
2
,
5
,
9
SUM_SUB
6
,
1
,
9
SWAP
3
,
6
SWAP
1
,
4
%endmacro
; This macro handles 8 pixels per line
%macro ADD_STORE_8P_2X 5
; src1, src2, tmp1, tmp2, zero
paddw
m
%
1
,
m11
paddw
m
%
2
,
m11
psraw
m
%
1
,
5
psraw
m
%
2
,
5
movh
m
%
3
,
[
outputq
]
movh
m
%
4
,
[
outputq
+
strideq
]
punpcklbw
m
%
3
,
m
%
5
punpcklbw
m
%
4
,
m
%
5
paddw
m
%
3
,
m
%
1
paddw
m
%
4
,
m
%
2
packuswb
m
%
3
,
m
%
5
packuswb
m
%
4
,
m
%
5
movh
[
outputq
],
m
%
3
movh
[
outputq
+
strideq
],
m
%
4
%endmacro
INIT_XMM
ss
se3
cglobal
idct8x8_64_add
,
3
,
5
,
13
,
input
,
output
,
stride
mova
m8
,
[
pd_8192
]
mova
m11
,
[
pw_16
]
mova
m12
,
[
pw_11585x2
]
lea
r3
,
[
2
*
strideq
]
mova
m0
,
[
inputq
+
0
]
mova
m1
,
[
inputq
+
16
]
mova
m2
,
[
inputq
+
32
]
mova
m3
,
[
inputq
+
48
]
mova
m4
,
[
inputq
+
64
]
mova
m5
,
[
inputq
+
80
]
mova
m6
,
[
inputq
+
96
]
mova
m7
,
[
inputq
+
112
]
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
IDCT8_1D
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
IDCT8_1D
pxor
m12
,
m12
ADD_STORE_8P_2X
0
,
1
,
9
,
10
,
12
lea
outputq
,
[
outputq
+
r3
]
ADD_STORE_8P_2X
2
,
3
,
9
,
10
,
12
lea
outputq
,
[
outputq
+
r3
]
ADD_STORE_8P_2X
4
,
5
,
9
,
10
,
12
lea
outputq
,
[
outputq
+
r3
]
ADD_STORE_8P_2X
6
,
7
,
9
,
10
,
12
RET
%endif
vp9/encoder/x86/vp9_dct_ssse3.asm
View file @
d289deb0
...
...
@@ -10,7 +10,7 @@
%include "third_party/x86inc/x86inc.asm"
; This file provides SSSE3 version of the forward transformation. Part
; of the macro definitions are originally derived from ffmpeg project.
; of the macro definitions are originally derived from
the
ffmpeg project.
; The current version applies to x86 64-bit only.
SECTION
_RODATA
...
...
vp9/vp9_common.mk
View file @
d289deb0
...
...
@@ -120,6 +120,10 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_vert_loopfilter_ds
VP9_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/vp9_idct_intrin_sse2.c
ifeq
($(ARCH_X86_64), yes)
VP9_COMMON_SRCS-$(HAVE_SSSE3)
+=
common/x86/vp9_idct_ssse3.asm
endif
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_idct16x16_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_loopfilter_16_neon.c
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment