vp8_variance_halfpixvar16x16_v_armv6.asm 6.62 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
;
;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
;  Use of this source code is governed by a BSD-style license
;  that can be found in the LICENSE file in the root of the source
;  tree. An additional intellectual property rights grant can be found
;  in the file PATENTS.  All contributing project authors may
;  be found in the AUTHORS file in the root of the source tree.
;


    EXPORT  |vp8_variance_halfpixvar16x16_v_armv6|

    ARM
    REQUIRE8
    PRESERVE8

    AREA ||.text||, CODE, READONLY, ALIGN=2

; r0    unsigned char *src_ptr
; r1    int source_stride
; r2    unsigned char *ref_ptr
; r3    int  recon_stride
; stack unsigned int *sse
|vp8_variance_halfpixvar16x16_v_armv6| PROC

    stmfd   sp!, {r4-r12, lr}
28
29
30
31

    pld     [r0, r1, lsl #0]
    pld     [r2, r3, lsl #0]

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    mov     r8, #0              ; initialize sum = 0
    ldr     r10, c80808080
    mov     r11, #0             ; initialize sse = 0
    mov     r12, #16            ; set loop counter to 16 (=block height)
    mov     lr, #0              ; constant zero
loop
    add     r9, r0, r1          ; set src pointer to next row
    ; 1st 4 pixels
    ldr     r4, [r0, #0]        ; load 4 src pixels
    ldr     r6, [r9, #0]        ; load 4 src pixels from next row
    ldr     r5, [r2, #0]        ; load 4 ref pixels

    ; bilinear interpolation
    mvn     r6, r6
    uhsub8  r4, r4, r6
    eor     r4, r4, r10

    usub8   r6, r4, r5          ; calculate difference
50
    pld     [r0, r1, lsl #1]
51
52
    sel     r7, r6, lr          ; select bytes with positive difference
    usub8   r6, r5, r4          ; calculate difference with reversed operands
53
    pld     [r2, r3, lsl #1]
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
    sel     r6, r6, lr          ; select bytes with negative difference

    ; calculate partial sums
    usad8   r4, r7, lr          ; calculate sum of positive differences
    usad8   r5, r6, lr          ; calculate sum of negative differences
    orr     r6, r6, r7          ; differences of all 4 pixels
    ; calculate total sum
    adds    r8, r8, r4          ; add positive differences to sum
    subs    r8, r8, r5          ; substract negative differences from sum

    ; calculate sse
    uxtb16  r5, r6              ; byte (two pixels) to halfwords
    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)

    ; 2nd 4 pixels
    ldr     r4, [r0, #4]        ; load 4 src pixels
    ldr     r6, [r9, #4]        ; load 4 src pixels from next row
    ldr     r5, [r2, #4]        ; load 4 ref pixels

    ; bilinear interpolation
    mvn     r6, r6
    uhsub8  r4, r4, r6
    eor     r4, r4, r10

    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)

    usub8   r6, r4, r5          ; calculate difference
    sel     r7, r6, lr          ; select bytes with positive difference
    usub8   r6, r5, r4          ; calculate difference with reversed operands
    sel     r6, r6, lr          ; select bytes with negative difference

    ; calculate partial sums
    usad8   r4, r7, lr          ; calculate sum of positive differences
    usad8   r5, r6, lr          ; calculate sum of negative differences
    orr     r6, r6, r7          ; differences of all 4 pixels

    ; calculate total sum
    add     r8, r8, r4          ; add positive differences to sum
    sub     r8, r8, r5          ; substract negative differences from sum

    ; calculate sse
    uxtb16  r5, r6              ; byte (two pixels) to halfwords
    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)

    ; 3rd 4 pixels
    ldr     r4, [r0, #8]        ; load 4 src pixels
    ldr     r6, [r9, #8]        ; load 4 src pixels from next row
    ldr     r5, [r2, #8]        ; load 4 ref pixels

    ; bilinear interpolation
    mvn     r6, r6
    uhsub8  r4, r4, r6
    eor     r4, r4, r10

    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)

    usub8   r6, r4, r5          ; calculate difference
    sel     r7, r6, lr          ; select bytes with positive difference
    usub8   r6, r5, r4          ; calculate difference with reversed operands
    sel     r6, r6, lr          ; select bytes with negative difference

    ; calculate partial sums
    usad8   r4, r7, lr          ; calculate sum of positive differences
    usad8   r5, r6, lr          ; calculate sum of negative differences
    orr     r6, r6, r7          ; differences of all 4 pixels

    ; calculate total sum
    add     r8, r8, r4          ; add positive differences to sum
    sub     r8, r8, r5          ; substract negative differences from sum

    ; calculate sse
    uxtb16  r5, r6              ; byte (two pixels) to halfwords
    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)

    ; 4th 4 pixels
    ldr     r4, [r0, #12]       ; load 4 src pixels
    ldr     r6, [r9, #12]       ; load 4 src pixels from next row
    ldr     r5, [r2, #12]       ; load 4 ref pixels

    ; bilinear interpolation
    mvn     r6, r6
    uhsub8  r4, r4, r6
    eor     r4, r4, r10

    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)

    usub8   r6, r4, r5          ; calculate difference
    add     r0, r0, r1          ; set src_ptr to next row
    sel     r7, r6, lr          ; select bytes with positive difference
    usub8   r6, r5, r4          ; calculate difference with reversed operands
    add     r2, r2, r3          ; set dst_ptr to next row
    sel     r6, r6, lr          ; select bytes with negative difference

    ; calculate partial sums
    usad8   r4, r7, lr          ; calculate sum of positive differences
    usad8   r5, r6, lr          ; calculate sum of negative differences
    orr     r6, r6, r7          ; differences of all 4 pixels

    ; calculate total sum
    add     r8, r8, r4          ; add positive differences to sum
    sub     r8, r8, r5          ; substract negative differences from sum

    ; calculate sse
    uxtb16  r5, r6              ; byte (two pixels) to halfwords
    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)


    subs    r12, r12, #1

    bne     loop

    ; return stuff
    ldr     r6, [sp, #40]       ; get address of sse
    mul     r0, r8, r8          ; sum * sum
    str     r11, [r6]           ; store sse
Johann's avatar
Johann committed
174
    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
175
176
177
178
179
180
181
182
183
184

    ldmfd   sp!, {r4-r12, pc}

    ENDP

c80808080
    DCD     0x80808080

    END