aom_convolve_copy_neon_asm.asm 2.58 KB
Newer Older
1
;
Yaowu Xu's avatar
Yaowu Xu committed
2
; Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
;
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9 10 11
; This source code is subject to the terms of the BSD 2 Clause License and
; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
; was not distributed with this source code in the LICENSE file, you can
; obtain it at www.aomedia.org/license/software. If the Alliance for Open
; Media Patent License 1.0 was not distributed with this source code in the
; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
;

12 13
;

Yaowu Xu's avatar
Yaowu Xu committed
14
    EXPORT  |aom_convolve_copy_neon|
15 16 17 18 19 20
    ARM
    REQUIRE8
    PRESERVE8

    AREA ||.text||, CODE, READONLY, ALIGN=2

Yaowu Xu's avatar
Yaowu Xu committed
21
|aom_convolve_copy_neon| PROC
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
    push                {r4-r5, lr}
    ldrd                r4, r5, [sp, #28]

    cmp                 r4, #32
    bgt                 copy64
    beq                 copy32
    cmp                 r4, #8
    bgt                 copy16
    beq                 copy8
    b                   copy4

copy64
    sub                 lr, r1, #32
    sub                 r3, r3, #32
copy64_h
    pld                 [r0, r1, lsl #1]
    vld1.8              {q0-q1}, [r0]!
    vld1.8              {q2-q3}, [r0], lr
    vst1.8              {q0-q1}, [r2@128]!
    vst1.8              {q2-q3}, [r2@128], r3
    subs                r5, r5, #1
    bgt                 copy64_h
    pop                 {r4-r5, pc}

copy32
    pld                 [r0, r1, lsl #1]
    vld1.8              {q0-q1}, [r0], r1
    pld                 [r0, r1, lsl #1]
    vld1.8              {q2-q3}, [r0], r1
    vst1.8              {q0-q1}, [r2@128], r3
    vst1.8              {q2-q3}, [r2@128], r3
    subs                r5, r5, #2
    bgt                 copy32
    pop                 {r4-r5, pc}

copy16
    pld                 [r0, r1, lsl #1]
    vld1.8              {q0}, [r0], r1
    pld                 [r0, r1, lsl #1]
    vld1.8              {q1}, [r0], r1
    vst1.8              {q0}, [r2@128], r3
    vst1.8              {q1}, [r2@128], r3
    subs                r5, r5, #2
    bgt                 copy16
    pop                 {r4-r5, pc}

copy8
    pld                 [r0, r1, lsl #1]
    vld1.8              {d0}, [r0], r1
    pld                 [r0, r1, lsl #1]
    vld1.8              {d2}, [r0], r1
    vst1.8              {d0}, [r2@64], r3
    vst1.8              {d2}, [r2@64], r3
    subs                r5, r5, #2
    bgt                 copy8
    pop                 {r4-r5, pc}

copy4
    ldr                 r12, [r0], r1
    str                 r12, [r2], r3
    subs                r5, r5, #1
    bgt                 copy4
    pop                 {r4-r5, pc}
    ENDP

    END