Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
cfaee9f7
Commit
cfaee9f7
authored
Feb 28, 2011
by
Yunqing Wang
Committed by
Code Review
Feb 28, 2011
Browse files
Merge "Add prefetch before variance calculation"
parents
3e6d476a
d96ba65a
Changes
1
Hide whitespace changes
Inline
Side-by-side
vp8/encoder/x86/variance_impl_sse2.asm
View file @
cfaee9f7
...
@@ -85,10 +85,9 @@ sym(vp8_get16x16var_sse2):
...
@@ -85,10 +85,9 @@ sym(vp8_get16x16var_sse2):
push
rbp
push
rbp
mov
rbp
,
rsp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
6
SHADOW_ARGS_TO_STACK
6
GET_GOT
rbx
push
rbx
push
rsi
push
rsi
push
rdi
push
rdi
sub
rsp
,
16
; end prolog
; end prolog
mov
rsi
,
arg
(
0
)
;[src_ptr]
mov
rsi
,
arg
(
0
)
;[src_ptr]
...
@@ -97,6 +96,29 @@ sym(vp8_get16x16var_sse2):
...
@@ -97,6 +96,29 @@ sym(vp8_get16x16var_sse2):
movsxd
rax
,
DWORD
PTR
arg
(
1
)
;[source_stride]
movsxd
rax
,
DWORD
PTR
arg
(
1
)
;[source_stride]
movsxd
rdx
,
DWORD
PTR
arg
(
3
)
;[recon_stride]
movsxd
rdx
,
DWORD
PTR
arg
(
3
)
;[recon_stride]
; Prefetch data
lea
rcx
,
[
rax
+
rax
*
2
]
prefetcht0
[
rsi
]
prefetcht0
[
rsi
+
rax
]
prefetcht0
[
rsi
+
rax
*
2
]
prefetcht0
[
rsi
+
rcx
]
lea
rbx
,
[
rsi
+
rax
*
4
]
prefetcht0
[
rbx
]
prefetcht0
[
rbx
+
rax
]
prefetcht0
[
rbx
+
rax
*
2
]
prefetcht0
[
rbx
+
rcx
]
lea
rcx
,
[
rdx
+
rdx
*
2
]
prefetcht0
[
rdi
]
prefetcht0
[
rdi
+
rdx
]
prefetcht0
[
rdi
+
rdx
*
2
]
prefetcht0
[
rdi
+
rcx
]
lea
rbx
,
[
rdi
+
rdx
*
4
]
prefetcht0
[
rbx
]
prefetcht0
[
rbx
+
rdx
]
prefetcht0
[
rbx
+
rdx
*
2
]
prefetcht0
[
rbx
+
rcx
]
pxor
xmm0
,
xmm0
; clear xmm0 for unpack
pxor
xmm0
,
xmm0
; clear xmm0 for unpack
pxor
xmm7
,
xmm7
; clear xmm7 for accumulating diffs
pxor
xmm7
,
xmm7
; clear xmm7 for accumulating diffs
...
@@ -107,6 +129,9 @@ var16loop:
...
@@ -107,6 +129,9 @@ var16loop:
movdqu
xmm1
,
XMMWORD
PTR
[
rsi
]
movdqu
xmm1
,
XMMWORD
PTR
[
rsi
]
movdqu
xmm2
,
XMMWORD
PTR
[
rdi
]
movdqu
xmm2
,
XMMWORD
PTR
[
rdi
]
prefetcht0
[
rsi
+
rax
*
8
]
prefetcht0
[
rdi
+
rdx
*
8
]
movdqa
xmm3
,
xmm1
movdqa
xmm3
,
xmm1
movdqa
xmm4
,
xmm2
movdqa
xmm4
,
xmm2
...
@@ -178,10 +203,9 @@ var16loop:
...
@@ -178,10 +203,9 @@ var16loop:
; begin epilog
; begin epilog
add
rsp
,
16
pop
rdi
pop
rdi
pop
rsi
pop
rsi
REST
ORE_GOT
pop
rbx
UNSHADOW_ARGS
UNSHADOW_ARGS
pop
rbp
pop
rbp
ret
ret
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment