Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
94b0c679
Commit
94b0c679
authored
Jul 24, 2013
by
Ronald S. Bultje
Browse files
d45 intra prediction SSSE3 optimizations.
Change-Id: Ie48035ff4f93c41f8a9b3023e6444fd10432d8fb
parent
7817d322
Changes
2
Hide whitespace changes
Inline
Side-by-side
vp9/common/vp9_rtcd_defs.sh
View file @
94b0c679
...
...
@@ -47,7 +47,7 @@ prototype void vp9_d27_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_d27_predictor_4x4
prototype void vp9_d45_predictor_4x4
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d45_predictor_4x4
specialize vp9_d45_predictor_4x4
ssse3
prototype void vp9_d63_predictor_4x4
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_4x4
...
...
@@ -86,7 +86,7 @@ prototype void vp9_d27_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui
specialize vp9_d27_predictor_8x8
prototype void vp9_d45_predictor_8x8
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d45_predictor_8x8
specialize vp9_d45_predictor_8x8
ssse3
prototype void vp9_d63_predictor_8x8
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_8x8
...
...
@@ -125,7 +125,7 @@ prototype void vp9_d27_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
specialize vp9_d27_predictor_16x16
prototype void vp9_d45_predictor_16x16
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d45_predictor_16x16
specialize vp9_d45_predictor_16x16
ssse3
prototype void vp9_d63_predictor_16x16
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_16x16
...
...
@@ -164,7 +164,7 @@ prototype void vp9_d27_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
specialize vp9_d27_predictor_32x32
prototype void vp9_d45_predictor_32x32
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d45_predictor_32x32
specialize vp9_d45_predictor_32x32
ssse3
prototype void vp9_d63_predictor_32x32
"uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
specialize vp9_d63_predictor_32x32
...
...
vp9/common/x86/vp9_intrapred_ssse3.asm
View file @
94b0c679
...
...
@@ -10,6 +10,31 @@
%include "third_party/x86inc/x86inc.asm"
SECTION
_RODATA
pb_1:
times
16
db
1
pw_2:
times
8
dw
2
pb_7m1:
times
8
db
7
,
-
1
pb_15:
times
16
db
15
sh_b01234577:
db
0
,
1
,
2
,
3
,
4
,
5
,
7
,
7
sh_b12345677:
db
1
,
2
,
3
,
4
,
5
,
6
,
7
,
7
sh_b23456777:
db
2
,
3
,
4
,
5
,
6
,
7
,
7
,
7
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
sh_b0123456777777777:
db
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
sh_b1234567777777777:
db
1
,
2
,
3
,
4
,
5
,
6
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
sh_b2345677777777777:
db
2
,
3
,
4
,
5
,
6
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
sh_b2w01234577:
db
0
,
-
1
,
1
,
-
1
,
2
,
-
1
,
3
,
-
1
,
4
,
-
1
,
5
,
-
1
,
7
,
-
1
,
7
,
-
1
sh_b2w12345677:
db
1
,
-
1
,
2
,
-
1
,
3
,
-
1
,
4
,
-
1
,
5
,
-
1
,
6
,
-
1
,
7
,
-
1
,
7
,
-
1
sh_b2w23456777:
db
2
,
-
1
,
3
,
-
1
,
4
,
-
1
,
5
,
-
1
,
6
,
-
1
,
7
,
-
1
,
7
,
-
1
,
7
,
-
1
sh_b2w01234567:
db
0
,
-
1
,
1
,
-
1
,
2
,
-
1
,
3
,
-
1
,
4
,
-
1
,
5
,
-
1
,
6
,
-
1
,
7
,
-
1
sh_b2w12345678:
db
1
,
-
1
,
2
,
-
1
,
3
,
-
1
,
4
,
-
1
,
5
,
-
1
,
6
,
-
1
,
7
,
-
1
,
8
,
-
1
sh_b2w23456789:
db
2
,
-
1
,
3
,
-
1
,
4
,
-
1
,
5
,
-
1
,
6
,
-
1
,
7
,
-
1
,
8
,
-
1
,
9
,
-
1
sh_b2w89abcdef:
db
8
,
-
1
,
9
,
-
1
,
10
,
-
1
,
11
,
-
1
,
12
,
-
1
,
13
,
-
1
,
14
,
-
1
,
15
,
-
1
sh_b2w9abcdeff:
db
9
,
-
1
,
10
,
-
1
,
11
,
-
1
,
12
,
-
1
,
13
,
-
1
,
14
,
-
1
,
15
,
-
1
,
15
,
-
1
sh_b2wabcdefff:
db
10
,
-
1
,
11
,
-
1
,
12
,
-
1
,
13
,
-
1
,
14
,
-
1
,
15
,
-
1
,
15
,
-
1
,
15
,
-
1
sh_b123456789abcdeff:
db
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
15
sh_b23456789abcdefff:
db
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
15
,
15
SECTION
.text
INIT_MMX
ss
se3
...
...
@@ -85,3 +110,182 @@ cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
inc
lineq
jnz
.loop
REP_RET
INIT_MMX
ss
se3
cglobal
d45_predictor_4x4
,
3
,
3
,
4
,
ds
t
,
stride
,
above
movq
m0
,
[
aboveq
]
pshufb
m2
,
m0
,
[
sh_b23456777
]
pshufb
m1
,
m0
,
[
sh_b01234577
]
pshufb
m0
,
[
sh_b12345677
]
pavgb
m3
,
m2
,
m1
pxor
m2
,
m1
pand
m2
,
[
pb_1
]
psubb
m3
,
m2
pavgb
m0
,
m3
; store 4 lines
movd
[
ds
tq
],
m0
psrlq
m0
,
8
movd
[
ds
tq
+
strideq
],
m0
lea
ds
tq
,
[
ds
tq
+
strideq
*
2
]
psrlq
m0
,
8
movd
[
ds
tq
],
m0
psrlq
m0
,
8
movd
[
ds
tq
+
strideq
],
m0
RET
INIT_MMX
ss
se3
cglobal
d45_predictor_8x8
,
3
,
3
,
4
,
ds
t
,
stride
,
above
movq
m0
,
[
aboveq
]
mova
m1
,
[
sh_b12345677
]
DEFINE_ARGS
ds
t
,
stride
,
stride3
,
line
lea
stride3q
,
[
strideq
*
3
]
pshufb
m2
,
m0
,
[
sh_b23456777
]
pavgb
m3
,
m2
,
m0
pxor
m2
,
m0
pshufb
m0
,
m1
pand
m2
,
[
pb_1
]
psubb
m3
,
m2
pavgb
m0
,
m3
; store 4 lines
movq
[
ds
tq
],
m0
pshufb
m0
,
m1
movq
[
ds
tq
+
strideq
],
m0
pshufb
m0
,
m1
movq
[
ds
tq
+
strideq
*
2
],
m0
pshufb
m0
,
m1
movq
[
ds
tq
+
stride3q
],
m0
pshufb
m0
,
m1
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
; store next 4 lines
movq
[
ds
tq
],
m0
pshufb
m0
,
m1
movq
[
ds
tq
+
strideq
],
m0
pshufb
m0
,
m1
movq
[
ds
tq
+
strideq
*
2
],
m0
pshufb
m0
,
m1
movq
[
ds
tq
+
stride3q
],
m0
RET
INIT_XMM
ss
se3
cglobal
d45_predictor_16x16
,
3
,
5
,
4
,
ds
t
,
stride
,
above
,
ds
t8
,
line
mova
m0
,
[
aboveq
]
DEFINE_ARGS
ds
t
,
stride
,
stride3
,
ds
t8
,
line
lea
stride3q
,
[
strideq
*
3
]
lea
ds
t8q
,
[
ds
tq
+
strideq
*
8
]
mova
m1
,
[
sh_b123456789abcdeff
]
pshufb
m2
,
m0
,
[
sh_b23456789abcdefff
]
pavgb
m3
,
m2
,
m0
pxor
m2
,
m0
pshufb
m0
,
m1
pand
m2
,
[
pb_1
]
psubb
m3
,
m2
pavgb
m0
,
m3
; first 4 lines and first half of 3rd 4 lines
mov
lined
,
2
.loop:
mova
[
ds
tq
],
m0
movhps
[
ds
t8q
],
m0
pshufb
m0
,
m1
mova
[
ds
tq
+
strideq
],
m0
movhps
[
ds
t8q
+
strideq
],
m0
pshufb
m0
,
m1
mova
[
ds
tq
+
strideq
*
2
],
m0
movhps
[
ds
t8q
+
strideq
*
2
],
m0
pshufb
m0
,
m1
mova
[
ds
tq
+
stride3q
],
m0
movhps
[
ds
t8q
+
stride3q
],
m0
pshufb
m0
,
m1
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
lea
ds
t8q
,
[
ds
t8q
+
strideq
*
4
]
dec
lined
jnz
.loop
; bottom-right 8x8 block
movhps
[
ds
tq
+
8
],
m0
movhps
[
ds
tq
+
strideq
+
8
],
m0
movhps
[
ds
tq
+
strideq
*
2
+
8
],
m0
movhps
[
ds
tq
+
stride3q
+
8
],
m0
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
movhps
[
ds
tq
+
8
],
m0
movhps
[
ds
tq
+
strideq
+
8
],
m0
movhps
[
ds
tq
+
strideq
*
2
+
8
],
m0
movhps
[
ds
tq
+
stride3q
+
8
],
m0
RET
INIT_XMM
ss
se3
cglobal
d45_predictor_32x32
,
3
,
5
,
7
,
ds
t
,
stride
,
above
,
ds
t16
,
line
mova
m0
,
[
aboveq
]
mova
m4
,
[
aboveq
+
16
]
DEFINE_ARGS
ds
t
,
stride
,
stride3
,
ds
t16
,
line
lea
stride3q
,
[
strideq
*
3
]
lea
ds
t16q
,
[
ds
tq
+
strideq
*
8
]
lea
ds
t16q
,
[
ds
t16q
+
strideq
*
8
]
mova
m1
,
[
sh_b123456789abcdeff
]
pshufb
m2
,
m4
,
[
sh_b23456789abcdefff
]
pavgb
m3
,
m2
,
m4
pxor
m2
,
m4
palignr
m5
,
m4
,
m0
,
1
palignr
m6
,
m4
,
m0
,
2
pshufb
m4
,
m1
pand
m2
,
[
pb_1
]
psubb
m3
,
m2
pavgb
m4
,
m3
pavgb
m3
,
m0
,
m6
pxor
m0
,
m6
pand
m0
,
[
pb_1
]
psubb
m3
,
m0
pavgb
m5
,
m3
; write 4x4 lines (and the first half of the second 4x4 lines)
mov
lined
,
4
.loop:
mova
[
ds
tq
],
m5
mova
[
ds
tq
+
16
],
m4
mova
[
ds
t16q
],
m4
palignr
m3
,
m4
,
m5
,
1
pshufb
m4
,
m1
mova
[
ds
tq
+
strideq
],
m3
mova
[
ds
tq
+
strideq
+
16
],
m4
mova
[
ds
t16q
+
strideq
],
m4
palignr
m5
,
m4
,
m3
,
1
pshufb
m4
,
m1
mova
[
ds
tq
+
strideq
*
2
],
m5
mova
[
ds
tq
+
strideq
*
2
+
16
],
m4
mova
[
ds
t16q
+
strideq
*
2
],
m4
palignr
m3
,
m4
,
m5
,
1
pshufb
m4
,
m1
mova
[
ds
tq
+
stride3q
],
m3
mova
[
ds
tq
+
stride3q
+
16
],
m4
mova
[
ds
t16q
+
stride3q
],
m4
palignr
m5
,
m4
,
m3
,
1
pshufb
m4
,
m1
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
lea
ds
t16q
,
[
ds
t16q
+
strideq
*
4
]
dec
lined
jnz
.loop
; write second half of second 4x4 lines
mova
[
ds
tq
+
16
],
m4
mova
[
ds
tq
+
strideq
+
16
],
m4
mova
[
ds
tq
+
strideq
*
2
+
16
],
m4
mova
[
ds
tq
+
stride3q
+
16
],
m4
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
mova
[
ds
tq
+
16
],
m4
mova
[
ds
tq
+
strideq
+
16
],
m4
mova
[
ds
tq
+
strideq
*
2
+
16
],
m4
mova
[
ds
tq
+
stride3q
+
16
],
m4
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
mova
[
ds
tq
+
16
],
m4
mova
[
ds
tq
+
strideq
+
16
],
m4
mova
[
ds
tq
+
strideq
*
2
+
16
],
m4
mova
[
ds
tq
+
stride3q
+
16
],
m4
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
mova
[
ds
tq
+
16
],
m4
mova
[
ds
tq
+
strideq
+
16
],
m4
mova
[
ds
tq
+
strideq
*
2
+
16
],
m4
mova
[
ds
tq
+
stride3q
+
16
],
m4
RET
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment