Commit 5cdc3a4c authored by Jan Kratochvil's avatar Jan Kratochvil Committed by John Koleszar
Browse files

nasm: address labels 'rel label' vice 'wrt rip'

nasm does not support `label wrt rip', it requires `rel label'. It is
still fully compatible with yasm.

Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id: I488773a4e930a56e43b0cc72d867ee5291215f50
parent e114f699
......@@ -58,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
pmulhw mm5, [x_s1sqr2 GLOBAL] ;
pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
......@@ -70,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
movq mm4, mm3
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
pmulhw mm3, [x_s1sqr2 GLOBAL]
pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
......@@ -113,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
pmulhw mm5, [x_s1sqr2 GLOBAL] ;
pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
......@@ -125,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
movq mm4, mm3
pmulhw mm5, [x_c1sqr2less1 GLOBAL]
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
pmulhw mm3, [x_s1sqr2 GLOBAL]
pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
paddw mm0, [fours GLOBAL]
paddw mm0, [GLOBAL(fours)]
paddw mm2, [fours GLOBAL]
paddw mm2, [GLOBAL(fours)]
movq mm6, mm2 ; a1
movq mm4, mm0 ; b1
......@@ -196,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx):
mov rax, arg(0) ;input
movd mm0, [rax]
paddw mm0, [fours GLOBAL]
paddw mm0, [GLOBAL(fours)]
mov rdx, arg(1) ;output
psraw mm0, 3
......@@ -239,7 +239,7 @@ sym(vp8_dc_only_idct_add_mmx):
movd mm5, arg(0) ;input_dc
paddw mm5, [fours GLOBAL]
paddw mm5, [GLOBAL(fours)]
psraw mm5, 3
......
......@@ -51,7 +51,7 @@ sym(idct_dequant_0_2x_sse2):
pshufhw xmm4, xmm4, 00000000b
mov rax, arg(2) ; pre
paddw xmm4, [fours GLOBAL]
paddw xmm4, [GLOBAL(fours)]
movsxd rcx, dword ptr arg(5) ; blk_stride
psraw xmm4, 3
......@@ -160,11 +160,11 @@ sym(idct_dequant_full_2x_sse2):
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
......@@ -172,10 +172,10 @@ sym(idct_dequant_full_2x_sse2):
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
......@@ -229,11 +229,11 @@ sym(idct_dequant_full_2x_sse2):
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
......@@ -241,16 +241,16 @@ sym(idct_dequant_full_2x_sse2):
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
paddw xmm0, [fours GLOBAL]
paddw xmm0, [GLOBAL(fours)]
paddw xmm2, [fours GLOBAL]
paddw xmm2, [GLOBAL(fours)]
movdqa xmm6, xmm2 ; a1
movdqa xmm4, xmm0 ; b1
......@@ -394,7 +394,7 @@ sym(idct_dequant_dc_0_2x_sse2):
punpckldq xmm4, xmm4
; Rounding to dequant and downshift
paddw xmm4, [fours GLOBAL]
paddw xmm4, [GLOBAL(fours)]
psraw xmm4, 3
; Predict buffer needs to be expanded from bytes to words
......@@ -505,11 +505,11 @@ sym(idct_dequant_dc_full_2x_sse2):
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
......@@ -517,10 +517,10 @@ sym(idct_dequant_dc_full_2x_sse2):
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
......@@ -574,11 +574,11 @@ sym(idct_dequant_dc_full_2x_sse2):
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
......@@ -586,16 +586,16 @@ sym(idct_dequant_dc_full_2x_sse2):
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
paddw xmm0, [fours GLOBAL]
paddw xmm0, [GLOBAL(fours)]
paddw xmm2, [fours GLOBAL]
paddw xmm2, [GLOBAL(fours)]
movdqa xmm6, xmm2 ; a1
movdqa xmm4, xmm0 ; b1
......
This diff is collapsed.
......@@ -126,7 +126,7 @@
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
movdqa xmm3, xmm0 ; q0
pand xmm2, [tfe GLOBAL] ; set lsb of each byte to zero
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
mov rdx, arg(4) ; hev get thresh
......@@ -182,14 +182,14 @@
movdqa xmm0, [rdx+32] ; q0
%endif
pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb xmm2, xmm7 ; p1 - q1
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
movdqa xmm3, xmm0 ; q0
psubsb xmm0, xmm6 ; q0 - p0
......@@ -204,8 +204,8 @@
movdqa xmm2, xmm1
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
punpckhbw xmm5, xmm2 ; axbxcxdx
punpcklbw xmm2, xmm2 ; exfxgxhx
......@@ -223,9 +223,9 @@
movdqa xmm5, xmm0 ; save results
packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
paddsw xmm5, [ones GLOBAL]
paddsw xmm5, [GLOBAL(ones)]
paddsw xmm1, [ones GLOBAL]
paddsw xmm1, [GLOBAL(ones)]
psraw xmm5, 1 ; partial shifted one more time for 2nd tap
psraw xmm1, 1 ; partial shifted one more time for 2nd tap
......@@ -241,18 +241,18 @@
movdqa xmm1, [rdx] ; p1
%endif
pandn xmm4, xmm5 ; high edge variance additive
pxor xmm6, [t80 GLOBAL] ; unoffset
pxor xmm6, [GLOBAL(t80)] ; unoffset
pxor xmm1, [t80 GLOBAL] ; reoffset
pxor xmm1, [GLOBAL(t80)] ; reoffset
psubsb xmm3, xmm0 ; q0-= q0 add
paddsb xmm1, xmm4 ; p1+= p1 add
pxor xmm3, [t80 GLOBAL] ; unoffset
pxor xmm3, [GLOBAL(t80)] ; unoffset
pxor xmm1, [t80 GLOBAL] ; unoffset
pxor xmm1, [GLOBAL(t80)] ; unoffset
psubsb xmm7, xmm4 ; q1-= q1 add
pxor xmm7, [t80 GLOBAL] ; unoffset
pxor xmm7, [GLOBAL(t80)] ; unoffset
%if %1 == 0
lea rsi, [rsi + rcx*2]
lea rdi, [rdi + rcx*2]
......@@ -401,10 +401,10 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
movdqa xmm0, [rdx+64] ; q0
%endif
pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
psubsb xmm2, xmm7 ; p1 - q1
movdqa xmm3, xmm0 ; q0
......@@ -431,14 +431,14 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
movdqa xmm5, xmm2
punpckhbw xmm1, xmm4 ; Filter 2 (lo)
paddsb xmm5, [t3 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 3)
paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3)
pmulhw xmm1, [s9 GLOBAL] ; Filter 2 (lo) * 9
pmulhw xmm1, [GLOBAL(s9)] ; Filter 2 (lo) * 9
pmulhw xmm0, [s9 GLOBAL] ; Filter 2 (hi) * 9
pmulhw xmm0, [GLOBAL(s9)] ; Filter 2 (hi) * 9
punpckhbw xmm7, xmm5 ; axbxcxdx
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
punpcklbw xmm5, xmm5 ; exfxgxhx
psraw xmm7, 11 ; sign extended shift right by 3
......@@ -462,9 +462,9 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
movdqa xmm5, xmm0
movdqa xmm2, xmm5
paddw xmm0, [s63 GLOBAL] ; Filter 2 (hi) * 9 + 63
paddw xmm0, [GLOBAL(s63)] ; Filter 2 (hi) * 9 + 63
paddw xmm1, [s63 GLOBAL] ; Filter 2 (lo) * 9 + 63
paddw xmm1, [GLOBAL(s63)] ; Filter 2 (lo) * 9 + 63
paddw xmm5, xmm5 ; Filter 2 (hi) * 18
paddw xmm7, xmm7 ; Filter 2 (lo) * 18
......@@ -510,26 +510,26 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
movdqa xmm7, XMMWORD PTR [rdx+16] ; p2
%endif
pxor xmm3, [t80 GLOBAL] ; *oq0 = sq^0x80
pxor xmm6, [t80 GLOBAL] ; *oq0 = sp^0x80
pxor xmm3, [GLOBAL(t80)] ; *oq0 = sq^0x80
pxor xmm6, [GLOBAL(t80)] ; *oq0 = sp^0x80
pxor xmm1, [t80 GLOBAL]
pxor xmm4, [t80 GLOBAL]
pxor xmm1, [GLOBAL(t80)]
pxor xmm4, [GLOBAL(t80)]
psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2)
paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2)
pxor xmm1, [t80 GLOBAL] ; *oq1 = sq^0x80;
pxor xmm4, [t80 GLOBAL] ; *op1 = sp^0x80;
pxor xmm1, [GLOBAL(t80)] ; *oq1 = sq^0x80;
pxor xmm4, [GLOBAL(t80)] ; *op1 = sp^0x80;
pxor xmm7, [t80 GLOBAL]
pxor xmm5, [t80 GLOBAL]
pxor xmm7, [GLOBAL(t80)]
pxor xmm5, [GLOBAL(t80)]
paddsb xmm7, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u)
psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u)
pxor xmm7, [t80 GLOBAL] ; *op2 = sp^0x80;
pxor xmm5, [t80 GLOBAL] ; *oq2 = sq^0x80;
pxor xmm7, [GLOBAL(t80)] ; *op2 = sp^0x80;
pxor xmm5, [GLOBAL(t80)] ; *oq2 = sq^0x80;
%if %1 == 0
lea rsi, [rsi+rcx*2]
......@@ -915,7 +915,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
por xmm5, xmm1 ; abs(p1-q1)
movdqa xmm1, xmm3 ; p0
pand xmm5, [tfe GLOBAL] ; set lsb of each byte to zero
pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psubusb xmm1, xmm6 ; p0-q0
psrlw xmm5, 1 ; abs(p1-q1)/2
......@@ -1415,7 +1415,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
psubusb xmm0, xmm1 ; q1-=p1
psubusb xmm1, xmm4 ; p1-=q1
por xmm1, xmm0 ; abs(p1-q1)
pand xmm1, [tfe GLOBAL] ; set lsb of each byte to zero
pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm1, 1 ; abs(p1-q1)/2
movdqu xmm5, [rsi+rax] ; p0
......@@ -1433,12 +1433,12 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
pcmpeqb xmm5, xmm3
; start work on filters
pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb xmm2, xmm7 ; p1 - q1
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
movdqa xmm3, xmm0 ; q0
psubsb xmm0, xmm6 ; q0 - p0
paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0)
......@@ -1447,7 +1447,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
pand xmm5, xmm2 ; mask filter values we don't care about
; do + 4 side
paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
......@@ -1460,11 +1460,11 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
por xmm0, xmm1 ; put the two together to get result
psubsb xmm3, xmm0 ; q0-= q0 add
pxor xmm3, [t80 GLOBAL] ; unoffset
pxor xmm3, [GLOBAL(t80)] ; unoffset
movdqu [rsi], xmm3 ; write back
; now do +3 side
psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4
psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
......@@ -1476,7 +1476,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddsb xmm6, xmm0 ; p0+= p0 add
pxor xmm6, [t80 GLOBAL] ; unoffset
pxor xmm6, [GLOBAL(t80)] ; unoffset
movdqu [rsi+rax], xmm6 ; write back
; begin epilog
......@@ -1596,7 +1596,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
psubusb xmm7, xmm0 ; q1-=p1
psubusb xmm6, xmm3 ; p1-=q1
por xmm6, xmm7 ; abs(p1-q1)
pand xmm6, [tfe GLOBAL] ; set lsb of each byte to zero
pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm6, 1 ; abs(p1-q1)/2
movdqa xmm5, xmm1 ; p0
......@@ -1622,16 +1622,16 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
movdqa t0, xmm0
movdqa t1, xmm3
pxor xmm0, [t80 GLOBAL] ; p1 offset to convert to signed values
pxor xmm3, [t80 GLOBAL] ; q1 offset to convert to signed values
pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb xmm0, xmm3 ; p1 - q1
movdqa xmm6, xmm1 ; p0
movdqa xmm7, xmm2 ; q0
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
pxor xmm7, [t80 GLOBAL] ; offset to convert to signed values
pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values
movdqa xmm3, xmm7 ; offseted ; q0
psubsb xmm7, xmm6 ; q0 - p0
......@@ -1643,7 +1643,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
pand xmm5, xmm0 ; mask filter values we don't care about
paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
......@@ -1658,10 +1658,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
por xmm0, xmm7 ; put the two together to get result
psubsb xmm3, xmm0 ; q0-= q0sz add
pxor xmm3, [t80 GLOBAL] ; unoffset q0
pxor xmm3, [GLOBAL(t80)] ; unoffset q0
; now do +3 side
psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4
psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
......@@ -1674,7 +1674,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
por xmm0, xmm5 ; put the two together to get result
paddsb xmm6, xmm0 ; p0+= p0 add
pxor xmm6, [t80 GLOBAL] ; unoffset p0
pxor xmm6, [GLOBAL(t80)] ; unoffset p0
movdqa xmm0, t0 ; p1
movdqa xmm4, t1 ; q1
......
......@@ -37,16 +37,16 @@ sym(vp8_post_proc_down_and_across_mmx):
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
; move the global rd onto the stack, since we don't have enough registers
; to do PIC addressing
movq mm0, [rd GLOBAL]
movq mm0, [GLOBAL(rd)]
sub rsp, 8
movq [rsp], mm0
%define RD [rsp]
%else
%define RD [rd GLOBAL]
%define RD [GLOBAL(rd)]
%endif
push rbx
lea rbx, [Blur GLOBAL]
lea rbx, [GLOBAL(Blur)]
movd mm2, dword ptr arg(6) ;flimit
punpcklwd mm2, mm2
punpckldq mm2, mm2
......@@ -286,7 +286,7 @@ sym(vp8_mbpost_proc_down_mmx):
%define flimit2 [rsp+128]
%if ABI_IS_32BIT=0
lea r8, [sym(vp8_rv) GLOBAL]
lea r8, [GLOBAL(sym(vp8_rv))]
%endif
;rows +=8;
......@@ -404,7 +404,7 @@ loop_row:
and rcx, 127
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
push rax
lea rax, [sym(vp8_rv) GLOBAL]
lea rax, [GLOBAL(sym(vp8_rv))]
movq mm4, [rax + rcx*2] ;vp8_rv[rcx*2]
pop rax
%elif ABI_IS_32BIT=0
......
......@@ -36,12 +36,12 @@ sym(vp8_post_proc_down_and_across_xmm):
ALIGN_STACK 16, rax
; move the global rd onto the stack, since we don't have enough registers
; to do PIC addressing
movdqa xmm0, [rd42 GLOBAL]
movdqa xmm0, [GLOBAL(rd42)]
sub rsp, 16
movdqa [rsp], xmm0
%define RD42 [rsp]
%else
%define RD42 [rd42 GLOBAL]
%define RD42 [GLOBAL(rd42)]
%endif
......@@ -275,7 +275,7 @@ sym(vp8_mbpost_proc_down_xmm):
%define flimit4 [rsp+128]
%if ABI_IS_32BIT=0
lea r8, [sym(vp8_rv) GLOBAL]
lea r8, [GLOBAL(sym(vp8_rv))]
%endif
;rows +=8;
......@@ -393,7 +393,7 @@ loop_row:
and rcx, 127
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
push rax
lea rax, [sym(vp8_rv) GLOBAL]
lea rax, [GLOBAL(sym(vp8_rv))]
movdqu xmm4, [rax + rcx*2] ;vp8_rv[rcx*2]
pop rax
%elif ABI_IS_32BIT=0
......@@ -579,7 +579,7 @@ nextcol4:
punpcklwd xmm1, xmm0
paddd xmm1, xmm6
paddd xmm1, [four8s GLOBAL]
paddd xmm1, [GLOBAL(four8s)]
psrad xmm1, 4
packssdw xmm1, xmm0
......
......@@ -84,7 +84,7 @@ nextrow:
pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers
paddsw mm3, mm5 ; mm3 += mm5
paddsw mm3, [rd GLOBAL] ; mm3 += round value
paddsw mm3, [GLOBAL(rd)] ; mm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
packuswb mm3, mm0 ; pack and unpack to saturate
punpcklbw mm3, mm0 ;
......@@ -136,7 +136,7 @@ sym(vp8_filter_block1d_v6_mmx):
push rdi
; end prolog
movq mm5, [rd GLOBAL]
movq mm5, [GLOBAL(rd)]
push rbx
mov rbx, arg(6) ;vp8_filter
movq mm1, [rbx + 16] ; do both the negative taps first!!!
......@@ -225,7 +225,7 @@ sym(vp8_filter_block1dc_v6_mmx):
push rdi
; end prolog
movq mm5, [rd GLOBAL]
movq mm5, [GLOBAL(rd)]
push rbx
mov rbx, arg(7) ;vp8_filter