Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
a522be29
Commit
a522be29
authored
Aug 19, 2010
by
Johann
Committed by
Code Review
Aug 19, 2010
Browse files
Options
Browse Files
Download
Plain Diff
Merge "fix armv6 simpleloop filter"
parents
6ea5bb85
467a0b99
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
47 additions
and
68 deletions
+47
-68
vp8/common/arm/armv6/simpleloopfilter_v6.asm
vp8/common/arm/armv6/simpleloopfilter_v6.asm
+47
-68
No files found.
vp8/common/arm/armv6/simpleloopfilter_v6.asm
View file @
a522be29
...
...
@@ -63,23 +63,22 @@ pstep RN r1
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
sub
src
,
src
,
pstep
,
lsl
#
1
; move src pointer down by 2 lines
ldr
r12
,
[
r3
]
; limit
ldr
r3
,
[
src
,
-
pstep
,
lsl
#
1
]
; p1
ldr
r
12
,
[
r3
],
#
4
; limit
ldr
r
3
,
[
src
],
pstep
; p1
ldr
r
9
,
[
sp
,
#
40
]
; count for 8-in-parallel
ldr
r
4
,
[
src
,
-
pstep
]
; p0
ldr
r9
,
[
sp
,
#
36
]
; count for 8-in-parallel
ldr
r4
,
[
src
],
pstep
; p0
ldr
r7
,
[
r2
],
#
4
; flimit
ldr
r5
,
[
src
],
pstep
; q0
ldr
r7
,
[
r2
]
; flimit
ldr
r5
,
[
src
]
; q0
ldr
r2
,
c0x80808080
ldr
r6
,
[
src
]
; q1
ldr
r6
,
[
src
,
pstep
]
; q1
uadd8
r7
,
r7
,
r7
; flimit * 2
mov
r9
,
r9
,
lsl
#
1
;
4-in-parallel
mov
r9
,
r9
,
lsl
#
1
;
double the count. we're doing 4 at a time
uadd8
r12
,
r7
,
r12
; flimit * 2 + limit
mov
lr
,
#
0
|
simple_hnext8
|
; vp8_simple_filter_mask() function
...
...
@@ -89,22 +88,19 @@ pstep RN r1
uqsub8
r10
,
r4
,
r5
; p0 - q0
uqsub8
r11
,
r5
,
r4
; q0 - p0
orr
r8
,
r8
,
r7
; abs(p1 - q1)
ldr
lr
,
c0x7F7F7F7F
; 01111111 mask
orr
r10
,
r10
,
r11
; abs(p0 - q0)
and
r8
,
lr
,
r8
,
lsr
#
1
; abs(p1 - q1) / 2
uhadd8
r8
,
r8
,
lr
; abs(p1 - q2) >> 1
uqadd8
r10
,
r10
,
r10
; abs(p0 - q0) * 2
mvn
lr
,
#
0
; r10 == -1
; STALL waiting on r10
uqadd8
r10
,
r10
,
r8
; abs(p0 - q0)*2 + abs(p1 - q1)/2
; STALL waiting on r10 :(
uqsub8
r10
,
r10
,
r12
; compare to flimit
mov
r8
,
#
0
usub8
r10
,
r8
,
r10
; use usub8 instead of ssub8
; STALL (maybe?) when are flags set? :/
sel
r10
,
lr
,
r8
; filter mask: lr
; STALL waiting on r10
mvn
r8
,
#
0
uqsub8
r10
,
r10
,
r12
; compare to flimit. need to do this twice because uqsub8 doesn't set GE flags
; and usub8 doesn't saturate
usub8
r10
,
lr
,
r10
; set GE flags for each byte
sel
r10
,
r8
,
lr
; filter mask: F or 0
cmp
r10
,
#
0
beq
si
mple_hskip_filter
; skip filtering
beq
si
mple_hskip_filter
; skip filtering
if we're &ing with 0s. would just write out the same values
;vp8_simple_filter() function
...
...
@@ -113,55 +109,45 @@ pstep RN r1
eor
r4
,
r4
,
r2
; p0 offset to convert to a signed value
eor
r5
,
r5
,
r2
; q0 offset to convert to a signed value
qsub8
r3
,
r3
,
r6
; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
qsub8
r6
,
r5
,
r4
; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
qsub8
r3
,
r3
,
r6
; vp8_signed_char_clamp(p1-q1)
qsub8
r6
,
r5
,
r4
; vp8_signed_char_clamp(q0-p0)
qadd8
r3
,
r3
,
r6
; += q0-p0
qadd8
r3
,
r3
,
r6
; += q0-p0
qadd8
r3
,
r3
,
r6
; p1-q1 + 3*(q0-p0))
and
r3
,
r3
,
r10
; &= mask
qadd8
r3
,
r3
,
r6
ldr
r8
,
c0x03030303
; r8 = 3
qadd8
r3
,
r3
,
r6
ldr
r7
,
c0x04040404
qadd8
r3
,
r3
,
r6
and
r3
,
r3
,
lr
; vp8_filter &= mask;
ldr
r8
,
c0x03030303
;save bottom 3 bits so that we round one side +4 and the other +3
qadd8
r7
,
r3
,
r7
; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
qadd8
r8
,
r3
,
r8
; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
qadd8
r3
,
r3
,
r7
; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
mov
r
7
,
#
0
shadd8
r
8
,
r8
,
r7
; Filter2 >>=
3
shadd8
r
3
,
r3
,
r7
; Filter1 >>=
3
shadd8
r
8
,
r8
,
r7
shadd8
r
3
,
r3
,
r7
shadd8
r
8
,
r8
,
r7
; r8: Filter2
shadd8
r
3
,
r3
,
r7
; r7: filter1
mov
r
3
,
#
0
shadd8
r
7
,
r7
,
r
3
shadd8
r
8
,
r8
,
r
3
shadd8
r
7
,
r7
,
r3
shadd8
r
8
,
r8
,
r3
shadd8
r
7
,
r7
,
r3
; Filter1 >>= 3
shadd8
r
8
,
r8
,
r3
; Filter2 >>= 3
;calculate output
sub
src
,
src
,
pstep
,
lsl
#
1
qsub8
r5
,
r5
,
r7
; u = vp8_signed_char_clamp(q0 - Filter1)
qadd8
r4
,
r4
,
r8
; u = vp8_signed_char_clamp(p0 + Filter2)
qsub8
r5
,
r5
,
r3
; u = vp8_signed_char_clamp(q0 - Filter1)
eor
r4
,
r4
,
r2
; *op0 = u^0x80
str
r4
,
[
src
],
pstep
; store op0 result
eor
r5
,
r5
,
r2
; *oq0 = u^0x80
str
r5
,
[
src
],
pstep
; store oq0 result
str
r5
,
[
src
]
; store oq0 result
eor
r4
,
r4
,
r2
; *op0 = u^0x80
str
r4
,
[
src
,
-
pstep
]
; store op0 result
|
simple_hskip_filter
|
add
src
,
src
,
#
4
sub
src
,
src
,
pstep
sub
src
,
src
,
pstep
,
lsl
#
1
subs
r9
,
r9
,
#
1
addne
src
,
src
,
#
4
; next row
;pld [src]
;pld [src, pstep]
;pld [src, pstep, lsl #1]
ldrne
r3
,
[
src
],
pstep
; p1
ldrne
r4
,
[
src
],
pstep
; p0
ldrne
r5
,
[
src
],
pstep
; q0
ldrne
r6
,
[
src
]
; q1
ldrne
r3
,
[
src
,
-
pstep
,
lsl
#
1
]
; p1
ldrne
r4
,
[
src
,
-
pstep
]
; p0
ldrne
r5
,
[
src
]
; q0
ldrne
r6
,
[
src
,
pstep
]
; q1
bne
si
mple_hnext8
...
...
@@ -174,9 +160,9 @@ pstep RN r1
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
ldr
r12
,
[
r2
]
,
#
4
; r12: flimit
ldr
r12
,
[
r2
]
; r12: flimit
ldr
r2
,
c0x80808080
ldr
r7
,
[
r3
]
,
#
4
; limit
ldr
r7
,
[
r3
]
; limit
; load soure data to r7, r8, r9, r10
ldrh
r3
,
[
src
,
#
-
2
]
...
...
@@ -213,16 +199,15 @@ pstep RN r1
uqsub8
r10
,
r5
,
r4
; q0 - p0
orr
r7
,
r7
,
r8
; abs(p1 - q1)
orr
r9
,
r9
,
r10
; abs(p0 - q0)
ldr
lr
,
c0x7F7F7F7F
; 0111 1111 mask
uqadd8
r9
,
r9
,
r9
; abs(p0 - q0) * 2
and
r7
,
lr
,
r7
,
lsr
#
1
; abs(p1 - q1) / 2
mov
r8
,
#
0
uqadd8
r9
,
r9
,
r9
; abs(p0 - q0) * 2
uhadd8
r7
,
r7
,
r8
; abs(p1 - q1) / 2
uqadd8
r7
,
r7
,
r9
; abs(p0 - q0)*2 + abs(p1 - q1)/2
mvn
r10
,
#
0
; r10 == -1
uqsub8
r7
,
r7
,
r12
; compare to flimit
usub8
r7
,
r8
,
r7
sel
r7
,
r10
,
r8
; filter mask: lr
sel
lr
,
r10
,
r8
; filter mask
cmp
lr
,
#
0
beq
si
mple_vskip_filter
; skip filtering
...
...
@@ -286,10 +271,6 @@ pstep RN r1
|
simple_vskip_filter
|
subs
r11
,
r11
,
#
1
;pld [src]
;pld [src, pstep]
;pld [src, pstep, lsl #1]
; load soure data to r7, r8, r9, r10
ldrneh
r3
,
[
src
,
#
-
2
]
ldrneh
r4
,
[
src
],
pstep
...
...
@@ -316,7 +297,5 @@ pstep RN r1
c0x80808080
DCD
0x80808080
c0x03030303
DCD
0x03030303
c0x04040404
DCD
0x04040404
c0x01010101
DCD
0x01010101
c0x7F7F7F7F
DCD
0x7F7F7F7F
END
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment