subtract_sse2.asm 12.6 KB
Newer Older
Yunqing Wang's avatar
Yunqing Wang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
;
;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
;  Use of this source code is governed by a BSD-style license
;  that can be found in the LICENSE file in the root of the source
;  tree. An additional intellectual property rights grant can be found
;  in the file PATENTS.  All contributing project authors may
;  be found in the AUTHORS file in the root of the source tree.
;


%include "vpx_ports/x86_abi_support.asm"

;void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
;                            short *diff, unsigned char *Predictor,
;                            int pitch);
global sym(vp8_subtract_b_sse2_impl)
sym(vp8_subtract_b_sse2_impl):
    push        rbp
    mov         rbp, rsp
    SHADOW_ARGS_TO_STACK 5
22
    GET_GOT     rbx
Yunqing Wang's avatar
Yunqing Wang committed
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
    push rsi
    push rdi
    ; end prolog

        mov     rdi,        arg(2) ;diff
        mov     rax,        arg(3) ;Predictor
        mov     rsi,        arg(0) ;z
        movsxd  rdx,        dword ptr arg(1);src_stride;
        movsxd  rcx,        dword ptr arg(4);pitch
        pxor    mm7,        mm7

        movd    mm0,        [rsi]
        movd    mm1,        [rax]
        punpcklbw   mm0,    mm7
        punpcklbw   mm1,    mm7
        psubw   mm0,        mm1
39
        movq    MMWORD PTR [rdi],      mm0
Yunqing Wang's avatar
Yunqing Wang committed
40 41 42 43 44 45

        movd    mm0,        [rsi+rdx]
        movd    mm1,        [rax+rcx]
        punpcklbw   mm0,    mm7
        punpcklbw   mm1,    mm7
        psubw   mm0,        mm1
46
        movq    MMWORD PTR [rdi+rcx*2], mm0
Yunqing Wang's avatar
Yunqing Wang committed
47 48 49 50 51 52

        movd    mm0,        [rsi+rdx*2]
        movd    mm1,        [rax+rcx*2]
        punpcklbw   mm0,    mm7
        punpcklbw   mm1,    mm7
        psubw   mm0,        mm1
53
        movq    MMWORD PTR [rdi+rcx*4], mm0
Yunqing Wang's avatar
Yunqing Wang committed
54 55 56 57 58 59 60 61 62

        lea     rsi,        [rsi+rdx*2]
        lea     rcx,        [rcx+rcx*2]

        movd    mm0,        [rsi+rdx]
        movd    mm1,        [rax+rcx]
        punpcklbw   mm0,    mm7
        punpcklbw   mm1,    mm7
        psubw   mm0,        mm1
63
        movq    MMWORD PTR [rdi+rcx*2], mm0
Yunqing Wang's avatar
Yunqing Wang committed
64 65 66 67

    ; begin epilog
    pop rdi
    pop rsi
68
    RESTORE_GOT
Yunqing Wang's avatar
Yunqing Wang committed
69 70 71 72 73 74 75 76 77 78 79
    UNSHADOW_ARGS
    pop         rbp
    ret


;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
global sym(vp8_subtract_mby_sse2)
sym(vp8_subtract_mby_sse2):
    push        rbp
    mov         rbp, rsp
    SHADOW_ARGS_TO_STACK 4
80
    GET_GOT     rbx
Yunqing Wang's avatar
Yunqing Wang committed
81 82 83 84 85 86 87 88 89 90 91 92 93
    push rsi
    push rdi
    ; end prolog

            mov         rsi,            arg(1) ;src
            mov         rdi,            arg(0) ;diff

            mov         rax,            arg(2) ;pred
            movsxd      rdx,            dword ptr arg(3) ;stride

            mov         rcx,            8      ; do two lines at one time

submby_loop:
94 95
            movdqa      xmm0,           XMMWORD PTR [rsi]   ; src
            movdqa      xmm1,           XMMWORD PTR [rax]   ; pred
Yunqing Wang's avatar
Yunqing Wang committed
96 97 98 99 100 101 102 103 104 105 106 107 108

            movdqa      xmm2,           xmm0
            psubb       xmm0,           xmm1

            pxor        xmm1,           [GLOBAL(t80)]   ;convert to signed values
            pxor        xmm2,           [GLOBAL(t80)]
            pcmpgtb     xmm1,           xmm2            ; obtain sign information

            movdqa      xmm2,    xmm0
            movdqa      xmm3,    xmm1
            punpcklbw   xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw   xmm2,    xmm3            ; put sign back to subtraction

109 110
            movdqa      XMMWORD PTR [rdi],   xmm0
            movdqa      XMMWORD PTR [rdi +16], xmm2
Yunqing Wang's avatar
Yunqing Wang committed
111

112 113
            movdqa      xmm4,           XMMWORD PTR [rsi + rdx]
            movdqa      xmm5,           XMMWORD PTR [rax + 16]
Yunqing Wang's avatar
Yunqing Wang committed
114 115 116 117 118 119 120 121 122 123 124 125 126

            movdqa      xmm6,           xmm4
            psubb       xmm4,           xmm5

            pxor        xmm5,           [GLOBAL(t80)]   ;convert to signed values
            pxor        xmm6,           [GLOBAL(t80)]
            pcmpgtb     xmm5,           xmm6            ; obtain sign information

            movdqa      xmm6,    xmm4
            movdqa      xmm7,    xmm5
            punpcklbw   xmm4,    xmm5            ; put sign back to subtraction
            punpckhbw   xmm6,    xmm7            ; put sign back to subtraction

127 128
            movdqa      XMMWORD PTR [rdi +32], xmm4
            movdqa      XMMWORD PTR [rdi +48], xmm6
Yunqing Wang's avatar
Yunqing Wang committed
129 130 131 132 133 134 135 136 137 138 139

            add         rdi,            64
            add         rax,            32
            lea         rsi,            [rsi+rdx*2]

            sub         rcx,            1
            jnz         submby_loop

    pop rdi
    pop rsi
    ; begin epilog
140
    RESTORE_GOT
Yunqing Wang's avatar
Yunqing Wang committed
141 142 143 144 145 146 147 148 149 150 151
    UNSHADOW_ARGS
    pop         rbp
    ret


;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
global sym(vp8_subtract_mbuv_sse2)
sym(vp8_subtract_mbuv_sse2):
    push        rbp
    mov         rbp, rsp
    SHADOW_ARGS_TO_STACK 5
152
    GET_GOT     rbx
Yunqing Wang's avatar
Yunqing Wang committed
153 154 155 156 157 158 159 160 161 162 163 164 165 166
    push rsi
    push rdi
    ; end prolog

            mov     rdi,        arg(0) ;diff
            mov     rax,        arg(3) ;pred
            mov     rsi,        arg(1) ;z = usrc
            add     rdi,        256*2  ;diff = diff + 256 (shorts)
            add     rax,        256    ;Predictor = pred + 256
            movsxd  rdx,        dword ptr arg(4) ;stride;
            lea     rcx,        [rdx + rdx*2]

            ;u
            ;line 0 1
167 168 169
            movq       xmm0,    MMWORD PTR [rsi]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rdx]
            movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
170 171 172 173 174 175 176 177 178 179 180 181 182 183
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

184 185
            movdqa     XMMWORD PTR [rdi],   xmm0
            movdqa     XMMWORD PTR [rdi +16],   xmm2
Yunqing Wang's avatar
Yunqing Wang committed
186 187

            ;line 2 3
188 189 190
            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rcx]
            movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
191 192 193 194 195 196 197 198 199 200 201 202 203 204
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

205 206
            movdqa     XMMWORD PTR [rdi + 32],   xmm0
            movdqa     XMMWORD PTR [rdi + 48],   xmm2
Yunqing Wang's avatar
Yunqing Wang committed
207 208 209 210

            ;line 4 5
            lea        rsi,     [rsi + rdx*4]

211 212 213
            movq       xmm0,    MMWORD PTR [rsi]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rdx]
            movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
214 215 216 217 218 219 220 221 222 223 224 225 226 227
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

228 229
            movdqa     XMMWORD PTR [rdi + 64],   xmm0
            movdqa     XMMWORD PTR [rdi + 80],   xmm2
Yunqing Wang's avatar
Yunqing Wang committed
230 231

            ;line 6 7
232 233 234
            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rcx]
            movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
235 236 237 238 239 240 241 242 243 244 245 246 247 248
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

249 250
            movdqa     XMMWORD PTR [rdi + 96],   xmm0
            movdqa     XMMWORD PTR [rdi + 112],  xmm2
Yunqing Wang's avatar
Yunqing Wang committed
251 252 253 254 255 256 257

            ;v
            mov     rsi,        arg(2) ;z = vsrc
            add     rdi,        64*2  ;diff = diff + 320 (shorts)
            add     rax,        64    ;Predictor = pred + 320

            ;line 0 1
258 259 260
            movq       xmm0,    MMWORD PTR [rsi]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rdx]
            movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
261 262 263 264 265 266 267 268 269 270 271 272 273 274
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

275 276
            movdqa     XMMWORD PTR [rdi],   xmm0
            movdqa     XMMWORD PTR [rdi +16],   xmm2
Yunqing Wang's avatar
Yunqing Wang committed
277 278

            ;line 2 3
279 280 281
            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rcx]
            movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
282 283 284 285 286 287 288 289 290 291 292 293 294 295
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

296 297
            movdqa     XMMWORD PTR [rdi + 32],   xmm0
            movdqa     XMMWORD PTR [rdi + 48],   xmm2
Yunqing Wang's avatar
Yunqing Wang committed
298 299 300 301

            ;line 4 5
            lea        rsi,     [rsi + rdx*4]

302 303 304
            movq       xmm0,    MMWORD PTR [rsi]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rdx]
            movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
305 306 307 308 309 310 311 312 313 314 315 316 317 318
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

319 320
            movdqa     XMMWORD PTR [rdi + 64],   xmm0
            movdqa     XMMWORD PTR [rdi + 80],   xmm2
Yunqing Wang's avatar
Yunqing Wang committed
321 322

            ;line 6 7
323 324 325
            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
            movq       xmm2,    MMWORD PTR [rsi+rcx]
            movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
Yunqing Wang's avatar
Yunqing Wang committed
326 327 328 329 330 331 332 333 334 335 336 337 338 339
            punpcklqdq xmm0,    xmm2

            movdqa     xmm2,    xmm0
            psubb      xmm0,    xmm1            ; subtraction with sign missed

            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
            pxor       xmm2,    [GLOBAL(t80)]
            pcmpgtb    xmm1,    xmm2            ; obtain sign information

            movdqa     xmm2,    xmm0
            movdqa     xmm3,    xmm1
            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction

340 341
            movdqa     XMMWORD PTR [rdi + 96],   xmm0
            movdqa     XMMWORD PTR [rdi + 112],  xmm2
Yunqing Wang's avatar
Yunqing Wang committed
342 343 344 345

    ; begin epilog
    pop rdi
    pop rsi
346
    RESTORE_GOT
Yunqing Wang's avatar
Yunqing Wang committed
347 348 349 350 351 352 353 354
    UNSHADOW_ARGS
    pop         rbp
    ret

SECTION_RODATA
align 16
t80:
    times 16 db 0x80