Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
85b764fe
Commit
85b764fe
authored
Feb 04, 2016
by
Adrian Grange
Committed by
Gerrit Code Review
Feb 04, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Get optimized inv_txfm func work with HBD build"
parents
90989934
bacba875
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
118 additions
and
12 deletions
+118
-12
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/vpx_dsp_rtcd_defs.pl
+8
-7
vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm
vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm
+103
-4
vpx_dsp/x86/inv_wht_sse2.asm
vpx_dsp/x86/inv_wht_sse2.asm
+7
-1
No files found.
vpx_dsp/vpx_dsp_rtcd_defs.pl
View file @
85b764fe
...
...
@@ -699,7 +699,7 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
specialize
qw/vpx_iwht4x4_1_add/
;
add_proto
qw/void vpx_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_iwht4x4_16_add/
;
specialize
qw/vpx_iwht4x4_16_add/
,
"
$sse2_x86inc
"
;
add_proto
qw/void vpx_highbd_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/vpx_highbd_idct4x4_1_add/
;
...
...
@@ -762,7 +762,7 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
add_proto
qw/void vpx_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_1_add/
;
add_proto
qw/void vpx_highbd_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/vpx_highbd_idct4x4_16_add/
;
...
...
@@ -785,10 +785,10 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
specialize
qw/vpx_idct4x4_1_add sse2/
;
add_proto
qw/void vpx_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct8x8_64_add sse2/
;
specialize
qw/vpx_idct8x8_64_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct8x8_12_add sse2/
;
specialize
qw/vpx_idct8x8_12_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct8x8_1_add sse2/
;
...
...
@@ -803,14 +803,15 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
specialize
qw/vpx_idct16x16_1_add sse2/
;
add_proto
qw/void vpx_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_1024_add sse2/
;
specialize
qw/vpx_idct32x32_1024_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct32x32_135_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_135_add sse2/
;
specialize
qw/vpx_idct32x32_135_add sse2/
,
"
$ssse3_x86_64_x86inc
";
# Need to add 135 eob idct32x32 implementations.
$vpx_idct32x32_135_add_sse2
=
vpx_idct32x32_1024_add_sse2
;
add_proto
qw/void vpx_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_34_add sse2/
;
specialize
qw/vpx_idct32x32_34_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_1_add sse2/
;
...
...
vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm
View file @
85b764fe
...
...
@@ -220,7 +220,24 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova
m12
,
[
pw_11585x2
]
lea
r3
,
[
2
*
strideq
]
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
inputq
+
0
]
packssdw
m0
,
[
inputq
+
16
]
mova
m1
,
[
inputq
+
32
]
packssdw
m1
,
[
inputq
+
48
]
mova
m2
,
[
inputq
+
64
]
packssdw
m2
,
[
inputq
+
80
]
mova
m3
,
[
inputq
+
96
]
packssdw
m3
,
[
inputq
+
112
]
mova
m4
,
[
inputq
+
128
]
packssdw
m4
,
[
inputq
+
144
]
mova
m5
,
[
inputq
+
160
]
packssdw
m5
,
[
inputq
+
176
]
mova
m6
,
[
inputq
+
192
]
packssdw
m6
,
[
inputq
+
208
]
mova
m7
,
[
inputq
+
224
]
packssdw
m7
,
[
inputq
+
240
]
%else
mova
m0
,
[
inputq
+
0
]
mova
m1
,
[
inputq
+
16
]
mova
m2
,
[
inputq
+
32
]
...
...
@@ -229,7 +246,7 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova
m5
,
[
inputq
+
80
]
mova
m6
,
[
inputq
+
96
]
mova
m7
,
[
inputq
+
112
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
IDCT8_1D
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
...
...
@@ -254,10 +271,21 @@ cglobal idct8x8_12_add, 3, 5, 13, input, output, stride
lea
r3
,
[
2
*
strideq
]
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
inputq
+
0
]
packssdw
m0
,
[
inputq
+
16
]
mova
m1
,
[
inputq
+
32
]
packssdw
m1
,
[
inputq
+
48
]
mova
m2
,
[
inputq
+
64
]
packssdw
m2
,
[
inputq
+
80
]
mova
m3
,
[
inputq
+
96
]
packssdw
m3
,
[
inputq
+
112
]
%else
mova
m0
,
[
inputq
+
0
]
mova
m1
,
[
inputq
+
16
]
mova
m2
,
[
inputq
+
32
]
mova
m3
,
[
inputq
+
48
]
%endif
punpcklwd
m0
,
m1
punpcklwd
m2
,
m3
...
...
@@ -765,6 +793,24 @@ idct32x32_34:
lea
r4
,
[
rsp
+
transposed_in
]
idct32x32_34_transpose:
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
r3
+
0
]
packssdw
m0
,
[
r3
+
16
]
mova
m1
,
[
r3
+
32
*
4
]
packssdw
m1
,
[
r3
+
32
*
4
+
16
]
mova
m2
,
[
r3
+
32
*
8
]
packssdw
m2
,
[
r3
+
32
*
8
+
16
]
mova
m3
,
[
r3
+
32
*
12
]
packssdw
m3
,
[
r3
+
32
*
12
+
16
]
mova
m4
,
[
r3
+
32
*
16
]
packssdw
m4
,
[
r3
+
32
*
16
+
16
]
mova
m5
,
[
r3
+
32
*
20
]
packssdw
m5
,
[
r3
+
32
*
20
+
16
]
mova
m6
,
[
r3
+
32
*
24
]
packssdw
m6
,
[
r3
+
32
*
24
+
16
]
mova
m7
,
[
r3
+
32
*
28
]
packssdw
m7
,
[
r3
+
32
*
28
+
16
]
%else
mova
m0
,
[
r3
+
0
]
mova
m1
,
[
r3
+
16
*
4
]
mova
m2
,
[
r3
+
16
*
8
]
...
...
@@ -773,6 +819,7 @@ idct32x32_34_transpose:
mova
m5
,
[
r3
+
16
*
20
]
mova
m6
,
[
r3
+
16
*
24
]
mova
m7
,
[
r3
+
16
*
28
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
...
...
@@ -1176,6 +1223,24 @@ idct32x32_135:
mov
r7
,
2
idct32x32_135_transpose:
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
r3
+
0
]
packssdw
m0
,
[
r3
+
16
]
mova
m1
,
[
r3
+
32
*
4
]
packssdw
m1
,
[
r3
+
32
*
4
+
16
]
mova
m2
,
[
r3
+
32
*
8
]
packssdw
m2
,
[
r3
+
32
*
8
+
16
]
mova
m3
,
[
r3
+
32
*
12
]
packssdw
m3
,
[
r3
+
32
*
12
+
16
]
mova
m4
,
[
r3
+
32
*
16
]
packssdw
m4
,
[
r3
+
32
*
16
+
16
]
mova
m5
,
[
r3
+
32
*
20
]
packssdw
m5
,
[
r3
+
32
*
20
+
16
]
mova
m6
,
[
r3
+
32
*
24
]
packssdw
m6
,
[
r3
+
32
*
24
+
16
]
mova
m7
,
[
r3
+
32
*
28
]
packssdw
m7
,
[
r3
+
32
*
28
+
16
]
%else
mova
m0
,
[
r3
+
0
]
mova
m1
,
[
r3
+
16
*
4
]
mova
m2
,
[
r3
+
16
*
8
]
...
...
@@ -1184,7 +1249,7 @@ idct32x32_135_transpose:
mova
m5
,
[
r3
+
16
*
20
]
mova
m6
,
[
r3
+
16
*
24
]
mova
m7
,
[
r3
+
16
*
28
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
mova
[
r4
+
0
],
m0
...
...
@@ -1196,14 +1261,22 @@ idct32x32_135_transpose:
mova
[
r4
+
16
*
6
],
m6
mova
[
r4
+
16
*
7
],
m7
%if CONFIG_VPX_HIGHBITDEPTH
add
r3
,
32
%else
add
r3
,
16
%endif
add
r4
,
16
*
8
dec
r7
jne
idct32x32_135_transpose
IDCT32X32_135
16
*
0
,
16
*
32
,
16
*
64
,
16
*
96
lea
stp
,
[
stp
+
16
*
8
]
%if CONFIG_VPX_HIGHBITDEPTH
lea
inputq
,
[
inputq
+
32
*
32
]
%else
lea
inputq
,
[
inputq
+
16
*
32
]
%endif
dec
r6
jnz
idct32x32_135
...
...
@@ -1614,6 +1687,24 @@ idct32x32_1024:
mov
r7
,
4
idct32x32_1024_transpose:
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
r3
+
0
]
packssdw
m0
,
[
r3
+
16
]
mova
m1
,
[
r3
+
32
*
4
]
packssdw
m1
,
[
r3
+
32
*
4
+
16
]
mova
m2
,
[
r3
+
32
*
8
]
packssdw
m2
,
[
r3
+
32
*
8
+
16
]
mova
m3
,
[
r3
+
32
*
12
]
packssdw
m3
,
[
r3
+
32
*
12
+
16
]
mova
m4
,
[
r3
+
32
*
16
]
packssdw
m4
,
[
r3
+
32
*
16
+
16
]
mova
m5
,
[
r3
+
32
*
20
]
packssdw
m5
,
[
r3
+
32
*
20
+
16
]
mova
m6
,
[
r3
+
32
*
24
]
packssdw
m6
,
[
r3
+
32
*
24
+
16
]
mova
m7
,
[
r3
+
32
*
28
]
packssdw
m7
,
[
r3
+
32
*
28
+
16
]
%else
mova
m0
,
[
r3
+
0
]
mova
m1
,
[
r3
+
16
*
4
]
mova
m2
,
[
r3
+
16
*
8
]
...
...
@@ -1622,6 +1713,7 @@ idct32x32_1024_transpose:
mova
m5
,
[
r3
+
16
*
20
]
mova
m6
,
[
r3
+
16
*
24
]
mova
m7
,
[
r3
+
16
*
28
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
...
...
@@ -1633,8 +1725,11 @@ idct32x32_1024_transpose:
mova
[
r4
+
16
*
5
],
m5
mova
[
r4
+
16
*
6
],
m6
mova
[
r4
+
16
*
7
],
m7
%if CONFIG_VPX_HIGHBITDEPTH
add
r3
,
32
%else
add
r3
,
16
%endif
add
r4
,
16
*
8
dec
r7
jne
idct32x32_1024_transpose
...
...
@@ -1642,7 +1737,11 @@ idct32x32_1024_transpose:
IDCT32X32_1024
16
*
0
,
16
*
32
,
16
*
64
,
16
*
96
lea
stp
,
[
stp
+
16
*
8
]
%if CONFIG_VPX_HIGHBITDEPTH
lea
inputq
,
[
inputq
+
32
*
32
]
%else
lea
inputq
,
[
inputq
+
16
*
32
]
%endif
dec
r6
jnz
idct32x32_1024
...
...
vpx_dsp/x86/inv_wht_sse2.asm
View file @
85b764fe
...
...
@@ -82,9 +82,15 @@ SECTION .text
INIT_XMM
ss
e2
cglobal
iwht4x4_16_add
,
3
,
3
,
7
,
input
,
output
,
stride
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
inputq
+
0
]
packssdw
m0
,
[
inputq
+
16
]
mova
m1
,
[
inputq
+
32
]
packssdw
m1
,
[
inputq
+
48
]
%else
mova
m0
,
[
inputq
+
0
]
mova
m1
,
[
inputq
+
16
]
%endif
psraw
m0
,
2
psraw
m1
,
2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment