Commit 683b5a31 authored by James Zern's avatar James Zern
Browse files

vpx_subpixel_8t_ssse3: fix reg counts/access

fixes build on windows x64; previously 'heightq' i.e., the 64-bit register
was accessed when only the 32-bit value was needed. given this is from a
stack variable the upper bits were undefined.

+ bump register/xmm counts; users of SETUP_LOCAL_VARS touch xmm13 in
64-bit builds and filter_block1d16_v* uses one extra temp variable

Change-Id: I9c768c0b2047481d1d3b11c2e16b2f8de6eb0d80
parent a3df343c
......@@ -104,7 +104,7 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
%define k0k1k4k5 m8
%define k2k3k6k7 m9
%define krd m10
%define orig_height r7
%define orig_height r7d
mova krd, [GLOBAL(pw_64)]
pshuflw k0k1k4k5, m4, 0b ;k0_k1
pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5
......@@ -131,8 +131,8 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
mova k2k3k6k7, m7
mova krd, m1
%endif
mov orig_height, heightq
shr heightq, 1
mov orig_height, heightd
shr heightd, 1
.loop:
;Do two rows at once
movh m0, [srcq - 3]
......@@ -200,12 +200,12 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
dec heightq
dec heightd
jnz .loop
; Do last row if output_height is odd
mov heightq, orig_height
and heightq, 1
mov heightd, orig_height
and heightd, 1
je .done
movh m0, [srcq - 3] ; load src
......@@ -254,17 +254,17 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER8 1
cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \
cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
%if ARCH_X86_64
%define orig_height r7
%define orig_height r7d
%else
%define orig_height heightmp
%endif
mov orig_height, heightq
shr heightq, 1
mov orig_height, heightd
shr heightd, 1
.loop:
movh m0, [srcq - 3]
......@@ -336,12 +336,12 @@ cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \
lea srcq, [srcq + sstrideq ]
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
dec heightq
dec heightd
jnz .loop
;Do last row if output_height is odd
mov heightq, orig_height
and heightq, 1
mov heightd, orig_height
and heightd, 1
je .done
movh m0, [srcq - 3]
......@@ -361,7 +361,7 @@ cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER16 1
cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 13, LOCAL_VARS_SIZE, \
cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
......@@ -427,7 +427,7 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 13, LOCAL_VARS_SIZE, \
lea srcq, [srcq + sstrideq]
mova [dstq], m0
lea dstq, [dstq + dstrideq]
dec heightq
dec heightd
jnz .loop
RET
%endm
......@@ -527,11 +527,11 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
%endif
movx [dstq], m1
add dstq, dst_stride
sub heightq, 2
cmp heightq, 1
sub heightd, 2
cmp heightd, 1
jg .loop
cmp heightq, 0
cmp heightd, 0
je .done
movx m0, [srcq ] ;A
......@@ -570,7 +570,7 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
;-------------------------------------------------------------------------------
%macro SUBPIX_VFILTER16 1
cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*2), 13, LOCAL_VARS_SIZE, \
cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
......@@ -655,7 +655,7 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*2), 13, LOCAL_VARS_SIZE, \
%endif
movh [dstq + 8], m3
add dstq, dst_stride
dec heightq
dec heightd
jnz .loop
RET
%endm
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment