detokenize.asm 11.9 KB
Newer Older
1
;
2
;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
;
;  Use of this source code is governed by a BSD-style license
;  that can be found in the LICENSE file in the root of the source
;  tree. An additional intellectual property rights grant can be found
;  in the file PATENTS.  All contributing project authors may
;  be found in the AUTHORS file in the root of the source tree.
;


    EXPORT  |vp8_decode_mb_tokens_v6|

    AREA    |.text|, CODE, READONLY  ; name this block of code

    INCLUDE vpx_asm_offsets.asm

l_qcoeff    EQU     0
l_i         EQU     4
l_type      EQU     8
l_stop      EQU     12
l_c         EQU     16
l_l_ptr     EQU     20
l_a_ptr     EQU     24
l_bc        EQU     28
l_coef_ptr  EQU     32
l_stacksize EQU     64


;; constant offsets -- these should be created at build time
Johann's avatar
Johann committed
31
c_block2above_offset         EQU 25
32
33
34
35
36
37
38
39
40
41
42
43
c_entropy_nodes              EQU 11
c_dct_eob_token              EQU 11

|vp8_decode_mb_tokens_v6| PROC
    stmdb       sp!, {r4 - r11, lr}
    sub         sp, sp, #l_stacksize
    mov         r7, r1                      ; type
    mov         r9, r0                      ; detoken

    ldr         r1, [r9, #detok_current_bc]
    ldr         r0, [r9, #detok_qcoeff_start_ptr]
    mov         r11, #0                     ; i
Johann's avatar
Johann committed
44
    mov         r3, #16                     ; stop
45
46
47
48

    cmp         r7, #1                      ; type ?= 1
    addeq       r11, r11, #24               ; i = 24
    addeq       r3, r3, #8                  ; stop = 24
Johann's avatar
Johann committed
49
    addeq       r0, r0, #3, 24              ; qcoefptr += 24*16
50
51
52
53
54
55
56
57
58
59
60

    str         r0, [sp, #l_qcoeff]
    str         r11, [sp, #l_i]
    str         r7, [sp, #l_type]
    str         r3, [sp, #l_stop]
    str         r1, [sp, #l_bc]

    add         lr, r9, r7, lsl #2          ; detoken + type*4

    ldr         r8, [r1, #bool_decoder_user_buffer]

Johann's avatar
Johann committed
61
    ldr         r10, [lr, #detok_coef_probs]
62
63
64
65
66
67
68
    ldr         r5, [r1, #bool_decoder_count]
    ldr         r6, [r1, #bool_decoder_range]
    ldr         r4, [r1, #bool_decoder_value]

    str         r10, [sp, #l_coef_ptr]

BLOCK_LOOP
Johann's avatar
Johann committed
69
    ldr         r3, [r9, #detok_ptr_block2leftabove]
70
    ldr         r1, [r9, #detok_L]
Johann's avatar
Johann committed
71
72
73
    ldr         r2, [r9, #detok_A]
    ldrb        r12, [r3, r11]!             ; block2left[i]
    ldrb        r3, [r3, #c_block2above_offset]; block2above[i]
74
75
76
77
78

    cmp         r7, #0                      ; c = !type
    moveq       r7, #1
    movne       r7, #0

Johann's avatar
Johann committed
79
80
    ldrb        r0, [r1, r12]!              ; *(L += block2left[i])
    ldrb        r3, [r2, r3]!               ; *(A += block2above[i])
81
82
83
    mov         lr, #c_entropy_nodes        ; ENTROPY_NODES = 11

; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
Johann's avatar
Johann committed
84
85
86
87
    cmp         r0, #0                      ; *l ?= 0
    movne       r0, #1
    cmp         r3, #0                      ; *a ?= 0
    addne       r0, r0, #1                  ; t
88
89

    str         r1, [sp, #l_l_ptr]          ; save &l
Johann's avatar
Johann committed
90
91
    str         r2, [sp, #l_a_ptr]          ; save &a
    smlabb      r0, r0, lr, r10             ; Prob = coef_probs + (t * ENTROPY_NODES)
92
93
94
95
96
    mov         r1, #0                      ; t = 0
    str         r7, [sp, #l_c]

    ;align 4
COEFF_LOOP
Johann's avatar
Johann committed
97
98
99
100
101
102
    ldr         r3, [r9, #detok_ptr_coef_bands_x]
    ldr         lr, [r9, #detok_coef_tree_ptr]
    ;STALL
    ldrb        r3, [r3, r7]                ; coef_bands_x[c]
    ;STALL
    ;STALL
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
    add         r0, r0, r3                  ; Prob += coef_bands_x[c]

get_token_loop
    ldrb        r2, [r0, +r1, asr #1]       ; Prob[t >> 1]
    mov         r3, r6, lsl #8              ; range << 8
    sub         r3, r3, #256                ; (range << 8) - (1 << 8)
    mov         r10, #1                     ; 1

    smlawb      r2, r3, r2, r10             ; split = 1 + (((range-1) * probability) >> 8)

    ldrb        r12, [r8]                   ; load cx data byte in stall slot : r8 = bufptr
    ;++

    subs        r3, r4, r2, lsl #24         ; value-(split<<24): used later to calculate shift for NORMALIZE
    addhs       r1, r1, #1                  ; t += 1
    movhs       r4, r3                      ; value -= bigsplit (split << 24)
    subhs       r2, r6, r2                  ; range -= split
 ;   movlo       r6, r2                      ; range = split

    ldrsb     r1, [lr, r1]                  ; t = onyx_coef_tree_ptr[t]

; NORMALIZE
    clz         r3, r2                      ; vp8dx_bitreader_norm[range] + 24
    sub         r3, r3, #24                 ; vp8dx_bitreader_norm[range]
    subs        r5, r5, r3                  ; count -= shift
    mov         r6, r2, lsl r3              ; range <<= shift
    mov         r4, r4, lsl r3              ; value <<= shift

; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
    addle         r5, r5, #8                ; count += 8
    rsble         r3, r5, #24               ; 24 - count
    addle         r8, r8, #1                ; bufptr++
    orrle         r4, r4, r12, lsl r3       ; value |= *bufptr << shift + 16

    cmp         r1, #0                      ; t ?= 0
    bgt         get_token_loop              ; while (t > 0)

    cmn         r1, #c_dct_eob_token        ; if(t == -DCT_EOB_TOKEN)
    beq         END_OF_BLOCK                ; break

    rsb         lr, r1, #0                  ; v = -t;

    cmp         lr, #4                      ; if(v > FOUR_TOKEN)
    ble         SKIP_EXTRABITS

    ldr         r3, [r9, #detok_teb_base_ptr]
    mov         r11, #1                     ; 1 in split = 1 + ... nope, v+= 1 << bits_count
    add         r7, r3, lr, lsl #4          ; detok_teb_base_ptr + (v << 4)

    ldrsh       lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
    ldrsh       r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length

extrabits_loop
    add         r3, r0, r7                  ; &teb_ptr->Probs[bits_count]

    ldrb        r2, [r3, #4]                ; probability. why +4?
    mov         r3, r6, lsl #8              ; range << 8
    sub         r3, r3, #256                ; range << 8 + 1 << 8

    smlawb      r2, r3, r2, r11             ; split = 1 +  (((range-1) * probability) >> 8)

    ldrb        r12, [r8]                   ; *bufptr
    ;++

    subs        r10, r4, r2, lsl #24        ; value - (split<<24)
    movhs       r4, r10                     ; value = value - (split << 24)
    subhs       r2, r6, r2                  ; range = range - split
    addhs       lr, lr, r11, lsl r0         ; v += ((UINT16)1<<bits_count)

; NORMALIZE
    clz         r3, r2                      ; shift - leading zeros in split
    sub         r3, r3, #24                 ; don't count first 3 bytes
    subs        r5, r5, r3                  ; count -= shift
    mov         r6, r2, lsl r3              ; range = range << shift
    mov         r4, r4, lsl r3              ; value <<= shift

    addle       r5, r5, #8                  ; count += BR_COUNT
    addle       r8, r8, #1                  ; bufptr++
    rsble       r3, r5, #24                 ; BR_COUNT - count
    orrle       r4, r4, r12, lsl r3         ; value |= *bufptr << (BR_COUNT - count)

    subs        r0, r0, #1                  ; bits_count --
    bpl         extrabits_loop


SKIP_EXTRABITS
    ldr         r11, [sp, #l_qcoeff]
    ldr         r0, [sp, #l_coef_ptr]       ; Prob = coef_probs

    cmp         r1, #0                      ; check for nonzero token - if (t)
    beq         SKIP_EOB_CHECK              ; if t is zero, we will skip the eob table chec

    add         r3, r6, #1                  ; range + 1
    mov         r2, r3, lsr #1              ; split = (range + 1) >> 1

    subs        r3, r4, r2, lsl #24         ; value - (split<<24)
    movhs       r4, r3                      ; value -= (split << 24)
    subhs       r2, r6, r2                  ; range -= split
    mvnhs       r3, lr                      ; -v
    addhs       lr, r3, #1                  ; v = (v ^ -1) + 1

; NORMALIZE
    clz         r3, r2                      ; leading 0s in split
    sub         r3, r3, #24                 ; shift
    subs        r5, r5, r3                  ; count -= shift
    mov         r6, r2, lsl r3              ; range <<= shift
    mov         r4, r4, lsl r3              ; value <<= shift
    ldrleb      r2, [r8], #1                ; *(bufptr++)
    addle       r5, r5, #8                  ; count += 8
Johann's avatar
Johann committed
212
    rsble       r3, r5, #24                 ; BR_COUNT - count
213
214
    orrle       r4, r4, r2, lsl r3          ; value |= *bufptr << (BR_COUNT - count)

Johann's avatar
Johann committed
215
    add         r0, r0, #11                 ; Prob += ENTROPY_NODES (11)
216
217
218

    cmn         r1, #1                      ; t < -ONE_TOKEN

Johann's avatar
Johann committed
219
    addlt       r0, r0, #11                 ; Prob += ENTROPY_NODES (11)
220
221
222
223
224
225
226

    mvn         r1, #1                      ; t = -1 ???? C is -2

SKIP_EOB_CHECK
    ldr         r7, [sp, #l_c]              ; c
    ldr         r3, [r9, #detok_scan]
    add         r1, r1, #2                  ; t+= 2
Johann's avatar
Johann committed
227
    cmp         r7, #15                     ; c should will be one higher
228
229
230
231
232
233
234
235
236
237

    ldr         r3, [r3, +r7, lsl #2]       ; scan[c] this needs pre-inc c value
    add         r7, r7, #1                  ; c++
    add         r3, r11, r3, lsl #1         ; qcoeff + scan[c]

    str         r7, [sp, #l_c]              ; store c
    strh        lr, [r3]                    ; qcoef_ptr[scan[c]] = v

    blt         COEFF_LOOP

Johann's avatar
Johann committed
238
    sub         r7, r7, #1                  ; if(t != -DCT_EOB_TOKEN) --c
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259

END_OF_BLOCK
    ldr         r3, [sp, #l_type]           ; type
    ldr         r10, [sp, #l_coef_ptr]      ; coef_ptr
    ldr         r0, [sp, #l_qcoeff]         ; qcoeff
    ldr         r11, [sp, #l_i]             ; i
    ldr         r12, [sp, #l_stop]          ; stop

    cmp         r3, #0                      ; type ?= 0
    moveq       r1, #1
    movne       r1, #0
    add         r3, r11, r9                 ; detok + i

    cmp         r7, r1                      ; c ?= !type
    strb        r7, [r3, #detok_eob]        ; eob[i] = c

    ldr         r7, [sp, #l_l_ptr]          ; l
    ldr         r2, [sp, #l_a_ptr]          ; a
    movne       r3, #1                      ; t
    moveq       r3, #0

Johann's avatar
Johann committed
260
    add         r0, r0, #32                 ; qcoeff += 32 (16 * 2?)
261
    add         r11, r11, #1                ; i++
Johann's avatar
Johann committed
262
263
    strb        r3, [r7]                    ; *l = t
    strb        r3, [r2]                    ; *a = t
264
265
266
    str         r0, [sp, #l_qcoeff]         ; qcoeff
    str         r11, [sp, #l_i]             ; i

Johann's avatar
Johann committed
267
    cmp         r11, r12                    ; i < stop
268
269
270
271
    ldr         r7, [sp, #l_type]           ; type

    blt         BLOCK_LOOP

Johann's avatar
Johann committed
272
    cmp         r11, #25                    ; i ?= 25
273
274
275
276
277
    bne         ln2_decode_mb_to

    ldr         r12, [r9, #detok_qcoeff_start_ptr]
    ldr         r10, [r9, #detok_coef_probs]
    mov         r7, #0                      ; type/i = 0
Johann's avatar
Johann committed
278
    mov         r3, #16                     ; stop = 16
279
280
281
282
283
    str         r12, [sp, #l_qcoeff]        ; qcoeff_ptr = qcoeff_start_ptr
    str         r7, [sp, #l_i]
    str         r7, [sp, #l_type]
    str         r3, [sp, #l_stop]

Johann's avatar
Johann committed
284
    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type=0]
285
286
287
288

    b           BLOCK_LOOP

ln2_decode_mb_to
Johann's avatar
Johann committed
289
    cmp         r11, #16                    ; i ?= 16
290
291
292
293
    bne         ln1_decode_mb_to

    mov         r10, #detok_coef_probs
    add         r10, r10, #2*4              ; coef_probs[type]
Johann's avatar
Johann committed
294
    ldr         r10, [r9, r10]              ; detok + detok_coef_probs[type]
295
296

    mov         r7, #2                      ; type = 2
Johann's avatar
Johann committed
297
    mov         r3, #24                     ; stop = 24
298
299
300
301

    str         r7, [sp, #l_type]
    str         r3, [sp, #l_stop]

Johann's avatar
Johann committed
302
    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type]
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
    b           BLOCK_LOOP

ln1_decode_mb_to
    ldr         r2, [sp, #l_bc]
    mov         r0, #0
    nop

    str         r8, [r2, #bool_decoder_user_buffer]
    str         r5, [r2, #bool_decoder_count]
    str         r4, [r2, #bool_decoder_value]
    str         r6, [r2, #bool_decoder_range]

    add         sp, sp, #l_stacksize
    ldmia       sp!, {r4 - r11, pc}

    ENDP  ; |vp8_decode_mb_tokens_v6|

    END