dequant_idct_neon.asm 3.38 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
;
2
;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
;
4
;  Use of this source code is governed by a BSD-style license
5
6
;  that can be found in the LICENSE file in the root of the source
;  tree. An additional intellectual property rights grant can be found
7
;  in the file PATENTS.  All contributing project authors may
8
;  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
;


Johann's avatar
Johann committed
12
    EXPORT  |vp8_dequant_idct_add_neon|
John Koleszar's avatar
John Koleszar committed
13
14
15
16
17
    ARM
    REQUIRE8
    PRESERVE8

    AREA ||.text||, CODE, READONLY, ALIGN=2
Johann's avatar
Johann committed
18
19
;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
;                           unsigned char *dest, int pitch, int stride)
John Koleszar's avatar
John Koleszar committed
20
21
; r0    short *input,
; r1    short *dq,
Johann's avatar
Johann committed
22
23
24
25
26
27
; r2    unsigned char *pred
; r3    unsigned char *dest
; sp    int pitch
; sp+4  int stride

|vp8_dequant_idct_add_neon| PROC
John Koleszar's avatar
John Koleszar committed
28
29
    vld1.16         {q3, q4}, [r0]
    vld1.16         {q5, q6}, [r1]
Johann's avatar
Johann committed
30
31
32
33
34
35
36
    ldr             r1, [sp]                ; pitch
    vld1.32         {d14[0]}, [r2], r1
    vld1.32         {d14[1]}, [r2], r1
    vld1.32         {d15[0]}, [r2], r1
    vld1.32         {d15[1]}, [r2]

    ldr             r1, [sp, #4]            ; stride
John Koleszar's avatar
John Koleszar committed
37

Johann's avatar
Johann committed
38
    ldr             r12, _CONSTANTS_
John Koleszar's avatar
John Koleszar committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
    vmul.i16        q2, q4, q6

;|short_idct4x4llm_neon| PROC
    vld1.16         {d0}, [r12]
    vswp            d3, d4                  ;q2(vp[4] vp[12])

    vqdmulh.s16     q3, q2, d0[2]
    vqdmulh.s16     q4, q2, d0[0]

    vqadd.s16       d12, d2, d3             ;a1
    vqsub.s16       d13, d2, d3             ;b1

    vshr.s16        q3, q3, #1
    vshr.s16        q4, q4, #1

Johann's avatar
Johann committed
56
    vqadd.s16       q3, q3, q2
John Koleszar's avatar
John Koleszar committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    vqadd.s16       q4, q4, q2

    vqsub.s16       d10, d6, d9             ;c1
    vqadd.s16       d11, d7, d8             ;d1

    vqadd.s16       d2, d12, d11
    vqadd.s16       d3, d13, d10
    vqsub.s16       d4, d13, d10
    vqsub.s16       d5, d12, d11

    vtrn.32         d2, d4
    vtrn.32         d3, d5
    vtrn.16         d2, d3
    vtrn.16         d4, d5

; memset(input, 0, 32) -- 32bytes
    vmov.i16        q14, #0

    vswp            d3, d4
    vqdmulh.s16     q3, q2, d0[2]
    vqdmulh.s16     q4, q2, d0[0]

    vqadd.s16       d12, d2, d3             ;a1
    vqsub.s16       d13, d2, d3             ;b1

    vmov            q15, q14

    vshr.s16        q3, q3, #1
    vshr.s16        q4, q4, #1

Johann's avatar
Johann committed
87
    vqadd.s16       q3, q3, q2
John Koleszar's avatar
John Koleszar committed
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
    vqadd.s16       q4, q4, q2

    vqsub.s16       d10, d6, d9             ;c1
    vqadd.s16       d11, d7, d8             ;d1

    vqadd.s16       d2, d12, d11
    vqadd.s16       d3, d13, d10
    vqsub.s16       d4, d13, d10
    vqsub.s16       d5, d12, d11

    vst1.16         {q14, q15}, [r0]

    vrshr.s16       d2, d2, #3
    vrshr.s16       d3, d3, #3
    vrshr.s16       d4, d4, #3
    vrshr.s16       d5, d5, #3

    vtrn.32         d2, d4
    vtrn.32         d3, d5
    vtrn.16         d2, d3
    vtrn.16         d4, d5

Johann's avatar
Johann committed
110
111
    vaddw.u8        q1, q1, d14
    vaddw.u8        q2, q2, d15
John Koleszar's avatar
John Koleszar committed
112

Johann's avatar
Johann committed
113
114
115
116
117
118
119
    vqmovun.s16     d0, q1
    vqmovun.s16     d1, q2

    vst1.32         {d0[0]}, [r3], r1
    vst1.32         {d0[1]}, [r3], r1
    vst1.32         {d1[0]}, [r3], r1
    vst1.32         {d1[1]}, [r3]
John Koleszar's avatar
John Koleszar committed
120

Johann's avatar
Johann committed
121
    bx             lr
John Koleszar's avatar
John Koleszar committed
122

Johann's avatar
Johann committed
123
    ENDP           ; |vp8_dequant_idct_add_neon|
John Koleszar's avatar
John Koleszar committed
124

Johann's avatar
Johann committed
125
126
127
128
; Constant Pool
_CONSTANTS_       DCD cospi8sqrt2minus1
cospi8sqrt2minus1 DCD 0x4e7b4e7b
sinpi8sqrt2       DCD 0x8a8c8a8c
John Koleszar's avatar
John Koleszar committed
129
130

    END