Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
85b764fe
Commit
85b764fe
authored
Feb 04, 2016
by
Adrian Grange
Committed by
Gerrit Code Review
Feb 04, 2016
Browse files
Merge "Get optimized inv_txfm func work with HBD build"
parents
90989934
bacba875
Changes
3
Hide whitespace changes
Inline
Side-by-side
vpx_dsp/vpx_dsp_rtcd_defs.pl
View file @
85b764fe
...
...
@@ -699,7 +699,7 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
specialize
qw/vpx_iwht4x4_1_add/
;
add_proto
qw/void vpx_iwht4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_iwht4x4_16_add/
;
specialize
qw/vpx_iwht4x4_16_add/
,
"
$sse2_x86inc
"
;
add_proto
qw/void vpx_highbd_idct4x4_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/vpx_highbd_idct4x4_1_add/
;
...
...
@@ -762,7 +762,7 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
add_proto
qw/void vpx_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_1_add/
;
add_proto
qw/void vpx_highbd_idct4x4_16_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride, int bd
";
specialize
qw/vpx_highbd_idct4x4_16_add/
;
...
...
@@ -785,10 +785,10 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
specialize
qw/vpx_idct4x4_1_add sse2/
;
add_proto
qw/void vpx_idct8x8_64_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct8x8_64_add sse2/
;
specialize
qw/vpx_idct8x8_64_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct8x8_12_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct8x8_12_add sse2/
;
specialize
qw/vpx_idct8x8_12_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct8x8_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct8x8_1_add sse2/
;
...
...
@@ -803,14 +803,15 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
specialize
qw/vpx_idct16x16_1_add sse2/
;
add_proto
qw/void vpx_idct32x32_1024_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_1024_add sse2/
;
specialize
qw/vpx_idct32x32_1024_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct32x32_135_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_135_add sse2/
;
specialize
qw/vpx_idct32x32_135_add sse2/
,
"
$ssse3_x86_64_x86inc
";
# Need to add 135 eob idct32x32 implementations.
$vpx_idct32x32_135_add_sse2
=
vpx_idct32x32_1024_add_sse2
;
add_proto
qw/void vpx_idct32x32_34_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_34_add sse2/
;
specialize
qw/vpx_idct32x32_34_add sse2/
,
"
$ssse3_x86_64_x86inc
"
;
add_proto
qw/void vpx_idct32x32_1_add/
,
"
const tran_low_t *input, uint8_t *dest, int dest_stride
";
specialize
qw/vpx_idct32x32_1_add sse2/
;
...
...
vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm
View file @
85b764fe
...
...
@@ -220,7 +220,24 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova
m12
,
[
pw_11585x2
]
lea
r3
,
[
2
*
strideq
]
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
inputq
+
0
]
packssdw
m0
,
[
inputq
+
16
]
mova
m1
,
[
inputq
+
32
]
packssdw
m1
,
[
inputq
+
48
]
mova
m2
,
[
inputq
+
64
]
packssdw
m2
,
[
inputq
+
80
]
mova
m3
,
[
inputq
+
96
]
packssdw
m3
,
[
inputq
+
112
]
mova
m4
,
[
inputq
+
128
]
packssdw
m4
,
[
inputq
+
144
]
mova
m5
,
[
inputq
+
160
]
packssdw
m5
,
[
inputq
+
176
]
mova
m6
,
[
inputq
+
192
]
packssdw
m6
,
[
inputq
+
208
]
mova
m7
,
[
inputq
+
224
]
packssdw
m7
,
[
inputq
+
240
]
%else
mova
m0
,
[
inputq
+
0
]
mova
m1
,
[
inputq
+
16
]
mova
m2
,
[
inputq
+
32
]
...
...
@@ -229,7 +246,7 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova
m5
,
[
inputq
+
80
]
mova
m6
,
[
inputq
+
96
]
mova
m7
,
[
inputq
+
112
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
IDCT8_1D
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
...
...
@@ -254,10 +271,21 @@ cglobal idct8x8_12_add, 3, 5, 13, input, output, stride
lea
r3
,
[
2
*
strideq
]
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
inputq
+
0
]
packssdw
m0
,
[
inputq
+
16
]
mova
m1
,
[
inputq
+
32
]
packssdw
m1
,
[
inputq
+
48
]
mova
m2
,
[
inputq
+
64
]
packssdw
m2
,
[
inputq
+
80
]
mova
m3
,
[
inputq
+
96
]
packssdw
m3
,
[
inputq
+
112
]
%else
mova
m0
,
[
inputq
+
0
]
mova
m1
,
[
inputq
+
16
]
mova
m2
,
[
inputq
+
32
]
mova
m3
,
[
inputq
+
48
]
%endif
punpcklwd
m0
,
m1
punpcklwd
m2
,
m3
...
...
@@ -765,6 +793,24 @@ idct32x32_34:
lea
r4
,
[
rsp
+
transposed_in
]
idct32x32_34_transpose:
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
r3
+
0
]
packssdw
m0
,
[
r3
+
16
]
mova
m1
,
[
r3
+
32
*
4
]
packssdw
m1
,
[
r3
+
32
*
4
+
16
]
mova
m2
,
[
r3
+
32
*
8
]
packssdw
m2
,
[
r3
+
32
*
8
+
16
]
mova
m3
,
[
r3
+
32
*
12
]
packssdw
m3
,
[
r3
+
32
*
12
+
16
]
mova
m4
,
[
r3
+
32
*
16
]
packssdw
m4
,
[
r3
+
32
*
16
+
16
]
mova
m5
,
[
r3
+
32
*
20
]
packssdw
m5
,
[
r3
+
32
*
20
+
16
]
mova
m6
,
[
r3
+
32
*
24
]
packssdw
m6
,
[
r3
+
32
*
24
+
16
]
mova
m7
,
[
r3
+
32
*
28
]
packssdw
m7
,
[
r3
+
32
*
28
+
16
]
%else
mova
m0
,
[
r3
+
0
]
mova
m1
,
[
r3
+
16
*
4
]
mova
m2
,
[
r3
+
16
*
8
]
...
...
@@ -773,6 +819,7 @@ idct32x32_34_transpose:
mova
m5
,
[
r3
+
16
*
20
]
mova
m6
,
[
r3
+
16
*
24
]
mova
m7
,
[
r3
+
16
*
28
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
...
...
@@ -1176,6 +1223,24 @@ idct32x32_135:
mov
r7
,
2
idct32x32_135_transpose:
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
r3
+
0
]
packssdw
m0
,
[
r3
+
16
]
mova
m1
,
[
r3
+
32
*
4
]
packssdw
m1
,
[
r3
+
32
*
4
+
16
]
mova
m2
,
[
r3
+
32
*
8
]
packssdw
m2
,
[
r3
+
32
*
8
+
16
]
mova
m3
,
[
r3
+
32
*
12
]
packssdw
m3
,
[
r3
+
32
*
12
+
16
]
mova
m4
,
[
r3
+
32
*
16
]
packssdw
m4
,
[
r3
+
32
*
16
+
16
]
mova
m5
,
[
r3
+
32
*
20
]
packssdw
m5
,
[
r3
+
32
*
20
+
16
]
mova
m6
,
[
r3
+
32
*
24
]
packssdw
m6
,
[
r3
+
32
*
24
+
16
]
mova
m7
,
[
r3
+
32
*
28
]
packssdw
m7
,
[
r3
+
32
*
28
+
16
]
%else
mova
m0
,
[
r3
+
0
]
mova
m1
,
[
r3
+
16
*
4
]
mova
m2
,
[
r3
+
16
*
8
]
...
...
@@ -1184,7 +1249,7 @@ idct32x32_135_transpose:
mova
m5
,
[
r3
+
16
*
20
]
mova
m6
,
[
r3
+
16
*
24
]
mova
m7
,
[
r3
+
16
*
28
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
mova
[
r4
+
0
],
m0
...
...
@@ -1196,14 +1261,22 @@ idct32x32_135_transpose:
mova
[
r4
+
16
*
6
],
m6
mova
[
r4
+
16
*
7
],
m7
%if CONFIG_VPX_HIGHBITDEPTH
add
r3
,
32
%else
add
r3
,
16
%endif
add
r4
,
16
*
8
dec
r7
jne
idct32x32_135_transpose
IDCT32X32_135
16
*
0
,
16
*
32
,
16
*
64
,
16
*
96
lea
stp
,
[
stp
+
16
*
8
]
%if CONFIG_VPX_HIGHBITDEPTH
lea
inputq
,
[
inputq
+
32
*
32
]
%else
lea
inputq
,
[
inputq
+
16
*
32
]
%endif
dec
r6
jnz
idct32x32_135
...
...
@@ -1614,6 +1687,24 @@ idct32x32_1024:
mov
r7
,
4
idct32x32_1024_transpose:
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
r3
+
0
]
packssdw
m0
,
[
r3
+
16
]
mova
m1
,
[
r3
+
32
*
4
]
packssdw
m1
,
[
r3
+
32
*
4
+
16
]
mova
m2
,
[
r3
+
32
*
8
]
packssdw
m2
,
[
r3
+
32
*
8
+
16
]
mova
m3
,
[
r3
+
32
*
12
]
packssdw
m3
,
[
r3
+
32
*
12
+
16
]
mova
m4
,
[
r3
+
32
*
16
]
packssdw
m4
,
[
r3
+
32
*
16
+
16
]
mova
m5
,
[
r3
+
32
*
20
]
packssdw
m5
,
[
r3
+
32
*
20
+
16
]
mova
m6
,
[
r3
+
32
*
24
]
packssdw
m6
,
[
r3
+
32
*
24
+
16
]
mova
m7
,
[
r3
+
32
*
28
]
packssdw
m7
,
[
r3
+
32
*
28
+
16
]
%else
mova
m0
,
[
r3
+
0
]
mova
m1
,
[
r3
+
16
*
4
]
mova
m2
,
[
r3
+
16
*
8
]
...
...
@@ -1622,6 +1713,7 @@ idct32x32_1024_transpose:
mova
m5
,
[
r3
+
16
*
20
]
mova
m6
,
[
r3
+
16
*
24
]
mova
m7
,
[
r3
+
16
*
28
]
%endif
TRANSPOSE8X8
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
...
...
@@ -1633,8 +1725,11 @@ idct32x32_1024_transpose:
mova
[
r4
+
16
*
5
],
m5
mova
[
r4
+
16
*
6
],
m6
mova
[
r4
+
16
*
7
],
m7
%if CONFIG_VPX_HIGHBITDEPTH
add
r3
,
32
%else
add
r3
,
16
%endif
add
r4
,
16
*
8
dec
r7
jne
idct32x32_1024_transpose
...
...
@@ -1642,7 +1737,11 @@ idct32x32_1024_transpose:
IDCT32X32_1024
16
*
0
,
16
*
32
,
16
*
64
,
16
*
96
lea
stp
,
[
stp
+
16
*
8
]
%if CONFIG_VPX_HIGHBITDEPTH
lea
inputq
,
[
inputq
+
32
*
32
]
%else
lea
inputq
,
[
inputq
+
16
*
32
]
%endif
dec
r6
jnz
idct32x32_1024
...
...
vpx_dsp/x86/inv_wht_sse2.asm
View file @
85b764fe
...
...
@@ -82,9 +82,15 @@ SECTION .text
INIT_XMM
ss
e2
cglobal
iwht4x4_16_add
,
3
,
3
,
7
,
input
,
output
,
stride
%if CONFIG_VPX_HIGHBITDEPTH
mova
m0
,
[
inputq
+
0
]
packssdw
m0
,
[
inputq
+
16
]
mova
m1
,
[
inputq
+
32
]
packssdw
m1
,
[
inputq
+
48
]
%else
mova
m0
,
[
inputq
+
0
]
mova
m1
,
[
inputq
+
16
]
%endif
psraw
m0
,
2
psraw
m1
,
2
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment