Commit 4a2b684e authored by Johann's avatar Johann

modify SAVE_XMM for potential 64bit use

the win64 abi requires saving and restoring xmm6:xmm15. currently
SAVE_XMM and RESTORE XMM only allow for saving xmm6:xmm7. allow
specifying the highest register used and if the stack is unaligned.

Change-Id: Ica5699622ffe3346d3a486f48eef0206c51cf867
parent a9b465c5
...@@ -102,7 +102,7 @@ sym(idct_dequant_full_2x_sse2): ...@@ -102,7 +102,7 @@ sym(idct_dequant_full_2x_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 7 SHADOW_ARGS_TO_STACK 7
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -443,7 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2): ...@@ -443,7 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 7 SHADOW_ARGS_TO_STACK 7
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
......
...@@ -17,7 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2): ...@@ -17,7 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 2 SHADOW_ARGS_TO_STACK 2
SAVE_XMM SAVE_XMM 6
push rsi push rsi
push rdi push rdi
; end prolog ; end prolog
...@@ -41,7 +41,7 @@ sym(vp8_short_inv_walsh4x4_sse2): ...@@ -41,7 +41,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
movdqa xmm4, xmm0 movdqa xmm4, xmm0
punpcklqdq xmm0, xmm3 ;d1 a1 punpcklqdq xmm0, xmm3 ;d1 a1
punpckhqdq xmm4, xmm3 ;c1 b1 punpckhqdq xmm4, xmm3 ;c1 b1
movd xmm7, eax movd xmm6, eax
movdqa xmm1, xmm4 ;c1 b1 movdqa xmm1, xmm4 ;c1 b1
paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
...@@ -66,7 +66,7 @@ sym(vp8_short_inv_walsh4x4_sse2): ...@@ -66,7 +66,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
movdqa xmm3, xmm4 ;ip[4] ip[0] movdqa xmm3, xmm4 ;ip[4] ip[0]
pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03 pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
...@@ -90,8 +90,8 @@ sym(vp8_short_inv_walsh4x4_sse2): ...@@ -90,8 +90,8 @@ sym(vp8_short_inv_walsh4x4_sse2):
punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00 punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02 punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
paddw xmm5, xmm7 paddw xmm5, xmm6
paddw xmm1, xmm7 paddw xmm1, xmm6
psraw xmm5, 3 psraw xmm5, 3
psraw xmm1, 3 psraw xmm1, 3
......
...@@ -288,7 +288,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): ...@@ -288,7 +288,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -338,7 +338,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): ...@@ -338,7 +338,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -584,7 +584,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): ...@@ -584,7 +584,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -634,7 +634,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): ...@@ -634,7 +634,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1024,7 +1024,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): ...@@ -1024,7 +1024,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1091,7 +1091,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): ...@@ -1091,7 +1091,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1249,7 +1249,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): ...@@ -1249,7 +1249,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1318,7 +1318,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2): ...@@ -1318,7 +1318,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1386,7 +1386,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): ...@@ -1386,7 +1386,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1503,7 +1503,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): ...@@ -1503,7 +1503,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value. push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value. mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx ; save callee-saved reg GET_GOT rbx ; save callee-saved reg
push rsi push rsi
push rdi push rdi
......
...@@ -26,7 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm): ...@@ -26,7 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 7 SHADOW_ARGS_TO_STACK 7
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -256,7 +256,7 @@ sym(vp8_mbpost_proc_down_xmm): ...@@ -256,7 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 5 SHADOW_ARGS_TO_STACK 5
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -456,7 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): ...@@ -456,7 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 5 SHADOW_ARGS_TO_STACK 5
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
......
...@@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2): ...@@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 4 SHADOW_ARGS_TO_STACK 4
SAVE_XMM SAVE_XMM 7
push rsi push rsi
push rdi push rdi
; end prolog ; end prolog
......
...@@ -37,7 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2): ...@@ -37,7 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 7 SHADOW_ARGS_TO_STACK 7
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -157,7 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2): ...@@ -157,7 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 7 SHADOW_ARGS_TO_STACK 7
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -333,7 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2): ...@@ -333,7 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 8 SHADOW_ARGS_TO_STACK 8
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -428,7 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2): ...@@ -428,7 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 8 SHADOW_ARGS_TO_STACK 8
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -538,7 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2): ...@@ -538,7 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -651,7 +651,7 @@ sym(vp8_filter_block1d16_h6_only_sse2): ...@@ -651,7 +651,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -816,7 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2): ...@@ -816,7 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -908,7 +908,6 @@ sym(vp8_unpack_block1d16_h6_sse2): ...@@ -908,7 +908,6 @@ sym(vp8_unpack_block1d16_h6_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 5 SHADOW_ARGS_TO_STACK 5
;SAVE_XMM ;xmm6, xmm7 are not used here.
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -948,7 +947,6 @@ unpack_block1d16_h6_sse2_rowloop: ...@@ -948,7 +947,6 @@ unpack_block1d16_h6_sse2_rowloop:
pop rdi pop rdi
pop rsi pop rsi
RESTORE_GOT RESTORE_GOT
;RESTORE_XMM
UNSHADOW_ARGS UNSHADOW_ARGS
pop rbp pop rbp
ret ret
...@@ -969,7 +967,7 @@ sym(vp8_bilinear_predict16x16_sse2): ...@@ -969,7 +967,7 @@ sym(vp8_bilinear_predict16x16_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1238,7 +1236,7 @@ sym(vp8_bilinear_predict8x8_sse2): ...@@ -1238,7 +1236,7 @@ sym(vp8_bilinear_predict8x8_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
......
...@@ -39,7 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3): ...@@ -39,7 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -182,7 +182,7 @@ sym(vp8_filter_block1d16_h6_ssse3): ...@@ -182,7 +182,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -289,7 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3): ...@@ -289,7 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -418,7 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3): ...@@ -418,7 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -606,7 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3): ...@@ -606,7 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -819,7 +819,6 @@ vp8_filter_block1d4_v6_ssse3_loop: ...@@ -819,7 +819,6 @@ vp8_filter_block1d4_v6_ssse3_loop:
pop rdi pop rdi
pop rsi pop rsi
RESTORE_GOT RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS UNSHADOW_ARGS
pop rbp pop rbp
ret ret
...@@ -886,7 +885,7 @@ sym(vp8_bilinear_predict16x16_ssse3): ...@@ -886,7 +885,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -1149,7 +1148,7 @@ sym(vp8_bilinear_predict8x8_ssse3): ...@@ -1149,7 +1148,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 6
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
%define input rcx %define input rcx
%define output rdx %define output rdx
%define pitch r8 %define pitch r8
SAVE_XMM SAVE_XMM 7, u
%else %else
%define input rdi %define input rdi
%define output rsi %define output rsi
......
...@@ -208,7 +208,7 @@ sym(vp8_mbblock_error_xmm_impl): ...@@ -208,7 +208,7 @@ sym(vp8_mbblock_error_xmm_impl):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 3 SHADOW_ARGS_TO_STACK 3
SAVE_XMM ; 6 SAVE_XMM 6
push rsi push rsi
push rdi push rdi
; end prolog ; end prolog
......
...@@ -17,7 +17,7 @@ sym(vp8_short_walsh4x4_sse2): ...@@ -17,7 +17,7 @@ sym(vp8_short_walsh4x4_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 3 SHADOW_ARGS_TO_STACK 3
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
......
...@@ -20,7 +20,7 @@ global sym(vp8_regular_quantize_b_sse2) ...@@ -20,7 +20,7 @@ global sym(vp8_regular_quantize_b_sse2)
sym(vp8_regular_quantize_b_sse2): sym(vp8_regular_quantize_b_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SAVE_XMM SAVE_XMM 7
GET_GOT rbx GET_GOT rbx
%if ABI_IS_32BIT %if ABI_IS_32BIT
......
...@@ -21,7 +21,7 @@ sym(vp8_sad16x16_wmt): ...@@ -21,7 +21,7 @@ sym(vp8_sad16x16_wmt):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 4 SHADOW_ARGS_TO_STACK 4
SAVE_XMM ; 6 SAVE_XMM 6
push rsi push rsi
push rdi push rdi
; end prolog ; end prolog
......
...@@ -33,15 +33,15 @@ ...@@ -33,15 +33,15 @@
movsxd rdx, dword ptr arg(3) ; ref_stride movsxd rdx, dword ptr arg(3) ; ref_stride
%else %else
%ifidn __OUTPUT_FORMAT__,x64 %ifidn __OUTPUT_FORMAT__,x64
SAVE_XMM 7, u
%define src_ptr rcx %define src_ptr rcx
%define src_stride rdx %define src_stride rdx
%define ref_ptr r8 %define ref_ptr r8
%define ref_stride r9 %define ref_stride r9
%define end_ptr r10 %define end_ptr r10
%define ret_var r11 %define ret_var r11
%define result_ptr [rsp+40+4*8] %define result_ptr [rsp+xmm_stack_space+8+4*8]
%define max_err [rsp+40+4*8] %define max_err [rsp+xmm_stack_space+8+4*8]
SAVE_XMM
%else %else
%define src_ptr rdi %define src_ptr rdi
%define src_stride rsi %define src_stride rsi
...@@ -108,6 +108,7 @@ ...@@ -108,6 +108,7 @@
xchg rbx, rax xchg rbx, rax
%else %else
%ifidn __OUTPUT_FORMAT__,x64 %ifidn __OUTPUT_FORMAT__,x64
SAVE_XMM 7, u
%define src_ptr rcx %define src_ptr rcx
%define src_stride rdx %define src_stride rdx
%define r0_ptr rsi %define r0_ptr rsi
...@@ -115,8 +116,7 @@ ...@@ -115,8 +116,7 @@
%define r2_ptr r11 %define r2_ptr r11
%define r3_ptr r8 %define r3_ptr r8
%define ref_stride r9 %define ref_stride r9
%define result_ptr [rsp+48+4*8] %define result_ptr [rsp+xmm_stack_space+16+4*8]
SAVE_XMM
push rsi push rsi
LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
......
...@@ -157,7 +157,7 @@ sym(vp8_sad16x16x3_ssse3): ...@@ -157,7 +157,7 @@ sym(vp8_sad16x16x3_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 5 SHADOW_ARGS_TO_STACK 5
SAVE_XMM SAVE_XMM 7
push rsi push rsi
push rdi push rdi
push rcx push rcx
...@@ -270,7 +270,7 @@ sym(vp8_sad16x8x3_ssse3): ...@@ -270,7 +270,7 @@ sym(vp8_sad16x8x3_ssse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 5 SHADOW_ARGS_TO_STACK 5
SAVE_XMM SAVE_XMM 7
push rsi push rsi
push rdi push rdi
push rcx push rcx
......
...@@ -66,7 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3): ...@@ -66,7 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 9 SHADOW_ARGS_TO_STACK 9
SAVE_XMM SAVE_XMM 15
push rsi push rsi
push rdi push rdi
; end prolog ; end prolog
...@@ -156,7 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3): ...@@ -156,7 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 9 SHADOW_ARGS_TO_STACK 9
SAVE_XMM SAVE_XMM 15
push rsi push rsi
push rdi push rdi
; end prolog