Commit 63ea8705 authored by Makoto Kato's avatar Makoto Kato Committed by John Koleszar

some XMM registers are non-volatile on windows x64 ABI

XMM6 to XMM15 are non-volatile on Windows x64 ABI.  We have to save
these registers.

Change-Id: I4676309f1350af25c8a35f0c81b1f0499ab99076
parent 8389f196
......@@ -17,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
SAVE_XMM
push rsi
push rdi
; end prolog
......@@ -101,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......
......@@ -26,6 +26,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -212,6 +213,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -231,6 +233,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -652,6 +655,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -671,6 +675,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -1002,6 +1007,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -1021,6 +1027,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -1564,6 +1571,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -1583,6 +1591,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -1679,6 +1688,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -1698,6 +1708,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx ; save callee-saved reg
push rsi
push rdi
......@@ -1942,6 +1953,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......
......@@ -26,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -240,6 +241,7 @@ acrossnextcol:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -254,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -439,6 +442,7 @@ loop_row:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -452,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -612,6 +617,7 @@ nextcol4:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......
......@@ -67,6 +67,7 @@ sym(vp8_recon4b_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM
push rsi
push rdi
; end prolog
......@@ -119,6 +120,7 @@ sym(vp8_recon4b_sse2):
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......
......@@ -37,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -129,6 +130,7 @@ filter_block1d8_h6_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -155,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -304,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -329,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -397,6 +402,7 @@ vp8_filter_block1d8_v6_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -510,6 +516,7 @@ vp8_filter_block1d16_v6_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -641,6 +648,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -876,6 +884,7 @@ vp8_filter_block1d8_v6_only_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -894,6 +903,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......@@ -933,6 +943,7 @@ unpack_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -953,6 +964,7 @@ sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
......
......@@ -215,6 +215,25 @@
%define UNSHADOW_ARGS mov rsp, rbp
%endif
; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
; rsp register has to be aligned
%ifidn __OUTPUT_FORMAT__,x64
%macro SAVE_XMM 0
sub rsp, 32
movdqa XMMWORD PTR [rsp], xmm6
movdqa XMMWORD PTR [rsp+16], xmm7
%endmacro
%macro RESTORE_XMM 0
movdqa xmm6, XMMWORD PTR [rsp]
movdqa xmm7, XMMWORD PTR [rsp+16]
add rsp, 32
%endmacro
%else
%macro SAVE_XMM 0
%endmacro
%macro RESTORE_XMM 0
%endmacro
%endif
; Name of the rodata section
;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment