Commit cfaee9f7 authored by Yunqing Wang's avatar Yunqing Wang Committed by Code Review

Merge "Add prefetch before variance calculation"

parents 3e6d476a d96ba65a
...@@ -85,10 +85,9 @@ sym(vp8_get16x16var_sse2): ...@@ -85,10 +85,9 @@ sym(vp8_get16x16var_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
GET_GOT rbx push rbx
push rsi push rsi
push rdi push rdi
sub rsp, 16
; end prolog ; end prolog
mov rsi, arg(0) ;[src_ptr] mov rsi, arg(0) ;[src_ptr]
...@@ -97,6 +96,29 @@ sym(vp8_get16x16var_sse2): ...@@ -97,6 +96,29 @@ sym(vp8_get16x16var_sse2):
movsxd rax, DWORD PTR arg(1) ;[source_stride] movsxd rax, DWORD PTR arg(1) ;[source_stride]
movsxd rdx, DWORD PTR arg(3) ;[recon_stride] movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
; Prefetch data
lea rcx, [rax+rax*2]
prefetcht0 [rsi]
prefetcht0 [rsi+rax]
prefetcht0 [rsi+rax*2]
prefetcht0 [rsi+rcx]
lea rbx, [rsi+rax*4]
prefetcht0 [rbx]
prefetcht0 [rbx+rax]
prefetcht0 [rbx+rax*2]
prefetcht0 [rbx+rcx]
lea rcx, [rdx+rdx*2]
prefetcht0 [rdi]
prefetcht0 [rdi+rdx]
prefetcht0 [rdi+rdx*2]
prefetcht0 [rdi+rcx]
lea rbx, [rdi+rdx*4]
prefetcht0 [rbx]
prefetcht0 [rbx+rdx]
prefetcht0 [rbx+rdx*2]
prefetcht0 [rbx+rcx]
pxor xmm0, xmm0 ; clear xmm0 for unpack pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
...@@ -107,6 +129,9 @@ var16loop: ...@@ -107,6 +129,9 @@ var16loop:
movdqu xmm1, XMMWORD PTR [rsi] movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi] movdqu xmm2, XMMWORD PTR [rdi]
prefetcht0 [rsi+rax*8]
prefetcht0 [rdi+rdx*8]
movdqa xmm3, xmm1 movdqa xmm3, xmm1
movdqa xmm4, xmm2 movdqa xmm4, xmm2
...@@ -178,10 +203,9 @@ var16loop: ...@@ -178,10 +203,9 @@ var16loop:
; begin epilog ; begin epilog
add rsp, 16
pop rdi pop rdi
pop rsi pop rsi
RESTORE_GOT pop rbx
UNSHADOW_ARGS UNSHADOW_ARGS
pop rbp pop rbp
ret ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment