Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
3c755577
Commit
3c755577
authored
Nov 17, 2011
by
Scott LaVarnway
Committed by
Gerrit Code Review
Nov 17, 2011
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Added predictor stride argument(s) to subtract functions"
parents
120a4640
edd98b73
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
374 additions
and
662 deletions
+374
-662
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+35
-28
vp8/encoder/arm/neon/subtract_neon.asm
vp8/encoder/arm/neon/subtract_neon.asm
+84
-70
vp8/encoder/encodeintra.c
vp8/encoder/encodeintra.c
+4
-2
vp8/encoder/encodemb.c
vp8/encoder/encodemb.c
+18
-13
vp8/encoder/encodemb.h
vp8/encoder/encodemb.h
+4
-2
vp8/encoder/rdopt.c
vp8/encoder/rdopt.c
+7
-5
vp8/encoder/x86/subtract_mmx.asm
vp8/encoder/x86/subtract_mmx.asm
+102
-311
vp8/encoder/x86/subtract_sse2.asm
vp8/encoder/x86/subtract_sse2.asm
+120
-231
No files found.
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
View file @
3c755577
...
@@ -72,22 +72,23 @@ loop_block
...
@@ -72,22 +72,23 @@ loop_block
; r0 short *diff
; r0 short *diff
; r1 unsigned char *usrc
; r1 unsigned char *usrc
; r2 unsigned char *vsrc
; r2 unsigned char *vsrc
; r3 unsigned char *pred
; r3 int src_stride
; stack int stride
; sp unsigned char *upred
; sp unsigned char *vpred
; sp int pred_stride
|
vp8_subtract_mbuv_armv6
|
PROC
|
vp8_subtract_mbuv_armv6
|
PROC
stmfd
sp
!
,
{
r4
-
r1
2
,
lr
}
stmfd
sp
!
,
{
r4
-
r1
1
}
add
r0
,
r0
,
#
512
; set *diff point to Cb
add
r0
,
r0
,
#
512
; set *diff point to Cb
add
r3
,
r3
,
#
256
; set *pred point to Cb
mov
r4
,
#
8
; loop count
mov
r4
,
#
8
; loop count
ldr
r5
,
[
sp
,
#
40
]
; stride
ldr
r5
,
[
sp
,
#
32
]
; upred
ldr
r12
,
[
sp
,
#
40
]
; pred_stride
; Subtract U block
; Subtract U block
loop_u
loop_u
ldr
r6
,
[
r1
]
; src
(A)
ldr
r6
,
[
r1
]
;
u
src (A)
ldr
r7
,
[
r
3
],
#
4
; pred
(A)
ldr
r7
,
[
r
5
]
;
u
pred (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
@@ -97,8 +98,8 @@ loop_u
...
@@ -97,8 +98,8 @@ loop_u
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r1
,
#
4
]
; src
(B)
ldr
r10
,
[
r1
,
#
4
]
;
u
src (B)
ldr
r11
,
[
r
3
]
,
#
4
; pred
(B)
ldr
r11
,
[
r
5
,
#
4
]
;
u
pred (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
@@ -114,7 +115,8 @@ loop_u
...
@@ -114,7 +115,8 @@ loop_u
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
add
r1
,
r1
,
r5
; update usrc pointer
add
r1
,
r1
,
r3
; update usrc pointer
add
r5
,
r5
,
r12
; update upred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
@@ -125,12 +127,13 @@ loop_u
...
@@ -125,12 +127,13 @@ loop_u
bne
loop_u
bne
loop_u
ldr
r5
,
[
sp
,
#
36
]
; vpred
mov
r4
,
#
8
; loop count
mov
r4
,
#
8
; loop count
; Subtract V block
; Subtract V block
loop_v
loop_v
ldr
r6
,
[
r2
]
; src
(A)
ldr
r6
,
[
r2
]
;
v
src (A)
ldr
r7
,
[
r
3
],
#
4
; pred
(A)
ldr
r7
,
[
r
5
]
;
v
pred (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
@@ -140,8 +143,8 @@ loop_v
...
@@ -140,8 +143,8 @@ loop_v
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r2
,
#
4
]
; src
(B)
ldr
r10
,
[
r2
,
#
4
]
;
v
src (B)
ldr
r11
,
[
r
3
]
,
#
4
; pred
(B)
ldr
r11
,
[
r
5
,
#
4
]
;
v
pred (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
@@ -157,7 +160,8 @@ loop_v
...
@@ -157,7 +160,8 @@ loop_v
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
add
r2
,
r2
,
r5
; update vsrc pointer
add
r2
,
r2
,
r3
; update vsrc pointer
add
r5
,
r5
,
r12
; update vpred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
@@ -168,23 +172,25 @@ loop_v
...
@@ -168,23 +172,25 @@ loop_v
bne
loop_v
bne
loop_v
ldmfd
sp
!
,
{
r4
-
r12
,
pc
}
ldmfd
sp
!
,
{
r4
-
r11
}
bx
lr
ENDP
ENDP
; r0 short *diff
; r0 short *diff
; r1 unsigned char *src
; r1 unsigned char *src
; r2 unsigned char *pred
; r2 int src_stride
; r3 int stride
; r3 unsigned char *pred
; sp int pred_stride
|
vp8_subtract_mby_armv6
|
PROC
|
vp8_subtract_mby_armv6
|
PROC
stmfd
sp
!
,
{
r4
-
r11
}
stmfd
sp
!
,
{
r4
-
r11
}
ldr
r12
,
[
sp
,
#
32
]
; pred_stride
mov
r4
,
#
16
mov
r4
,
#
16
loop
loop
ldr
r6
,
[
r1
]
; src (A)
ldr
r6
,
[
r1
]
; src (A)
ldr
r7
,
[
r
2
],
#
4
; pred (A)
ldr
r7
,
[
r
3
]
; pred (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
@@ -195,7 +201,7 @@ loop
...
@@ -195,7 +201,7 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r1
,
#
4
]
; src (B)
ldr
r10
,
[
r1
,
#
4
]
; src (B)
ldr
r11
,
[
r
2
]
,
#
4
; pred (B)
ldr
r11
,
[
r
3
,
#
4
]
; pred (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
@@ -212,7 +218,7 @@ loop
...
@@ -212,7 +218,7 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
ldr
r10
,
[
r1
,
#
8
]
; src (C)
ldr
r10
,
[
r1
,
#
8
]
; src (C)
ldr
r11
,
[
r
2
]
,
#
4
; pred (C)
ldr
r11
,
[
r
3
,
#
8
]
; pred (C)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
@@ -229,10 +235,10 @@ loop
...
@@ -229,10 +235,10 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (C)
usub16
r7
,
r10
,
r11
; [d3 | d1] (C)
ldr
r10
,
[
r1
,
#
12
]
; src (D)
ldr
r10
,
[
r1
,
#
12
]
; src (D)
ldr
r11
,
[
r
2
]
,
#
4
; pred (D)
ldr
r11
,
[
r
3
,
#
12
]
; pred (D)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (C)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (C)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (C)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (C)
str
r8
,
[
r0
],
#
4
; diff (C)
str
r8
,
[
r0
],
#
4
; diff (C)
uxtb16
r8
,
r10
; [s2 | s0] (D)
uxtb16
r8
,
r10
; [s2 | s0] (D)
...
@@ -245,7 +251,8 @@ loop
...
@@ -245,7 +251,8 @@ loop
usub16
r6
,
r8
,
r9
; [d2 | d0] (D)
usub16
r6
,
r8
,
r9
; [d2 | d0] (D)
usub16
r7
,
r10
,
r11
; [d3 | d1] (D)
usub16
r7
,
r10
,
r11
; [d3 | d1] (D)
add
r1
,
r1
,
r3
; update src pointer
add
r1
,
r1
,
r2
; update src pointer
add
r3
,
r3
,
r12
; update pred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (D)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (D)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (D)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (D)
...
@@ -257,7 +264,7 @@ loop
...
@@ -257,7 +264,7 @@ loop
bne
loop
bne
loop
ldmfd
sp
!
,
{
r4
-
r11
}
ldmfd
sp
!
,
{
r4
-
r11
}
mov
pc
,
lr
bx
lr
ENDP
ENDP
...
...
vp8/encoder/arm/neon/subtract_neon.asm
View file @
3c755577
...
@@ -61,19 +61,24 @@
...
@@ -61,19 +61,24 @@
;==========================================
;==========================================
;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride)
;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
; unsigned char *pred, int pred_stride)
|
vp8_subtract_mby_neon
|
PROC
|
vp8_subtract_mby_neon
|
PROC
push
{
r4
-
r7
}
mov
r12
,
#
4
mov
r12
,
#
4
ldr
r4
,
[
sp
,
#
16
]
; pred_stride
mov
r6
,
#
32
; "diff" stride x2
add
r5
,
r0
,
#
16
; second diff pointer
subtract_mby_loop
subtract_mby_loop
vld1.8
{
q0
}
,
[
r1
],
r
3
;load src
vld1.8
{
q0
}
,
[
r1
],
r
2
;load src
vld1.8
{
q1
}
,
[
r
2
]
!
;load pred
vld1.8
{
q1
}
,
[
r
3
],
r4
;load pred
vld1.8
{
q2
}
,
[
r1
],
r
3
vld1.8
{
q2
}
,
[
r1
],
r
2
vld1.8
{
q3
}
,
[
r
2
]
!
vld1.8
{
q3
}
,
[
r
3
],
r4
vld1.8
{
q4
}
,
[
r1
],
r
3
vld1.8
{
q4
}
,
[
r1
],
r
2
vld1.8
{
q5
}
,
[
r
2
]
!
vld1.8
{
q5
}
,
[
r
3
],
r4
vld1.8
{
q6
}
,
[
r1
],
r
3
vld1.8
{
q6
}
,
[
r1
],
r
2
vld1.8
{
q7
}
,
[
r
2
]
!
vld1.8
{
q7
}
,
[
r
3
],
r4
vsubl.u8
q8
,
d0
,
d2
vsubl.u8
q8
,
d0
,
d2
vsubl.u8
q9
,
d1
,
d3
vsubl.u8
q9
,
d1
,
d3
...
@@ -84,46 +89,53 @@ subtract_mby_loop
...
@@ -84,46 +89,53 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d14
vsubl.u8
q14
,
d12
,
d14
vsubl.u8
q15
,
d13
,
d15
vsubl.u8
q15
,
d13
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q8
}
,
[
r0
]
,
r6
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q9
}
,
[
r
5
],
r6
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q10
}
,
[
r0
]
,
r6
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q11
}
,
[
r
5
],
r6
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q12
}
,
[
r0
]
,
r6
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q13
}
,
[
r
5
],
r6
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q14
}
,
[
r0
]
,
r6
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q15
}
,
[
r
5
],
r6
subs
r12
,
r12
,
#
1
subs
r12
,
r12
,
#
1
bne
subtract_mby_loop
bne
subtract_mby_loop
pop
{
r4
-
r7
}
bx
lr
bx
lr
ENDP
ENDP
;=================================
;=================================
;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
|
vp8_subtract_mbuv_neon
|
PROC
|
vp8_subtract_mbuv_neon
|
PROC
ldr
r12
,
[
sp
]
push
{
r4
-
r7
}
ldr
r4
,
[
sp
,
#
16
]
; upred
ldr
r5
,
[
sp
,
#
20
]
; vpred
ldr
r6
,
[
sp
,
#
24
]
; pred_stride
add
r0
,
r0
,
#
512
; short *udiff = diff + 256;
mov
r12
,
#
32
; "diff" stride x2
add
r7
,
r0
,
#
16
; second diff pointer
;u
;u
add
r0
,
r0
,
#
512
; short *udiff = diff + 256;
vld1.8
{
d0
}
,
[
r1
],
r3
;load usrc
add
r3
,
r3
,
#
256
; unsigned char *upred = pred + 256;
vld1.8
{
d1
}
,
[
r4
],
r6
;load upred
vld1.8
{
d2
}
,
[
r1
],
r3
vld1.8
{
d0
}
,
[
r1
],
r12
;load src
vld1.8
{
d3
}
,
[
r4
],
r6
vld1.8
{
d1
}
,
[
r3
]
!
;load pred
vld1.8
{
d4
}
,
[
r1
],
r3
vld1.8
{
d2
}
,
[
r1
],
r12
vld1.8
{
d5
}
,
[
r4
],
r6
vld1.8
{
d3
}
,
[
r3
]
!
vld1.8
{
d6
}
,
[
r1
],
r3
vld1.8
{
d4
}
,
[
r1
],
r12
vld1.8
{
d7
}
,
[
r4
],
r6
vld1.8
{
d5
}
,
[
r3
]
!
vld1.8
{
d8
}
,
[
r1
],
r3
vld1.8
{
d6
}
,
[
r1
],
r12
vld1.8
{
d9
}
,
[
r4
],
r6
vld1.8
{
d7
}
,
[
r3
]
!
vld1.8
{
d10
}
,
[
r1
],
r3
vld1.8
{
d8
}
,
[
r1
],
r12
vld1.8
{
d11
}
,
[
r4
],
r6
vld1.8
{
d9
}
,
[
r3
]
!
vld1.8
{
d12
}
,
[
r1
],
r3
vld1.8
{
d10
}
,
[
r1
],
r12
vld1.8
{
d13
}
,
[
r4
],
r6
vld1.8
{
d11
}
,
[
r3
]
!
vld1.8
{
d14
}
,
[
r1
],
r3
vld1.8
{
d12
}
,
[
r1
],
r12
vld1.8
{
d15
}
,
[
r4
],
r6
vld1.8
{
d13
}
,
[
r3
]
!
vld1.8
{
d14
}
,
[
r1
],
r12
vld1.8
{
d15
}
,
[
r3
]
!
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q9
,
d2
,
d3
vsubl.u8
q9
,
d2
,
d3
...
@@ -134,32 +146,32 @@ subtract_mby_loop
...
@@ -134,32 +146,32 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q15
,
d14
,
d15
vsubl.u8
q15
,
d14
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q8
}
,
[
r0
]
,
r12
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q9
}
,
[
r
7
],
r12
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q10
}
,
[
r0
]
,
r12
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q11
}
,
[
r
7
],
r12
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q12
}
,
[
r0
]
,
r12
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q13
}
,
[
r
7
],
r12
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q14
}
,
[
r0
]
,
r12
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q15
}
,
[
r
7
],
r12
;v
;v
vld1.8
{
d0
}
,
[
r2
],
r
12
;load src
vld1.8
{
d0
}
,
[
r2
],
r
3
;load
v
src
vld1.8
{
d1
}
,
[
r
3
]
!
;load pred
vld1.8
{
d1
}
,
[
r
5
],
r6
;load
v
pred
vld1.8
{
d2
}
,
[
r2
],
r
12
vld1.8
{
d2
}
,
[
r2
],
r
3
vld1.8
{
d3
}
,
[
r
3
]
!
vld1.8
{
d3
}
,
[
r
5
],
r6
vld1.8
{
d4
}
,
[
r2
],
r
12
vld1.8
{
d4
}
,
[
r2
],
r
3
vld1.8
{
d5
}
,
[
r
3
]
!
vld1.8
{
d5
}
,
[
r
5
],
r6
vld1.8
{
d6
}
,
[
r2
],
r
12
vld1.8
{
d6
}
,
[
r2
],
r
3
vld1.8
{
d7
}
,
[
r
3
]
!
vld1.8
{
d7
}
,
[
r
5
],
r6
vld1.8
{
d8
}
,
[
r2
],
r
12
vld1.8
{
d8
}
,
[
r2
],
r
3
vld1.8
{
d9
}
,
[
r
3
]
!
vld1.8
{
d9
}
,
[
r
5
],
r6
vld1.8
{
d10
}
,
[
r2
],
r
12
vld1.8
{
d10
}
,
[
r2
],
r
3
vld1.8
{
d11
}
,
[
r
3
]
!
vld1.8
{
d11
}
,
[
r
5
],
r6
vld1.8
{
d12
}
,
[
r2
],
r
12
vld1.8
{
d12
}
,
[
r2
],
r
3
vld1.8
{
d13
}
,
[
r
3
]
!
vld1.8
{
d13
}
,
[
r
5
],
r6
vld1.8
{
d14
}
,
[
r2
],
r
12
vld1.8
{
d14
}
,
[
r2
],
r
3
vld1.8
{
d15
}
,
[
r
3
]
!
vld1.8
{
d15
}
,
[
r
5
],
r6
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q9
,
d2
,
d3
vsubl.u8
q9
,
d2
,
d3
...
@@ -170,16 +182,18 @@ subtract_mby_loop
...
@@ -170,16 +182,18 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q15
,
d14
,
d15
vsubl.u8
q15
,
d14
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q8
}
,
[
r0
]
,
r12
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q9
}
,
[
r
7
],
r12
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q10
}
,
[
r0
]
,
r12
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q11
}
,
[
r
7
],
r12
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q12
}
,
[
r0
]
,
r12
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q13
}
,
[
r
7
],
r12
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q14
}
,
[
r0
]
,
r12
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q15
}
,
[
r
7
],
r12
pop
{
r4
-
r7
}
bx
lr
bx
lr
ENDP
ENDP
END
END
vp8/encoder/encodeintra.c
View file @
3c755577
...
@@ -100,7 +100,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
...
@@ -100,7 +100,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mby
)(
&
x
->
e_mbd
);
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mby
)(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
vp8_transform_intra_mby
(
x
);
vp8_transform_intra_mby
(
x
);
...
@@ -115,7 +115,9 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
...
@@ -115,7 +115,9 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
{
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mbuv
)(
&
x
->
e_mbd
);
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mbuv
)(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
vp8_transform_mbuv
(
x
);
...
...
vp8/encoder/encodemb.c
View file @
3c755577
...
@@ -48,12 +48,12 @@ void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
...
@@ -48,12 +48,12 @@ void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
}
}
}
}
void
vp8_subtract_mbuv_c
(
short
*
diff
,
unsigned
char
*
usrc
,
unsigned
char
*
vsrc
,
unsigned
char
*
pred
,
int
stride
)
void
vp8_subtract_mbuv_c
(
short
*
diff
,
unsigned
char
*
usrc
,
unsigned
char
*
vsrc
,
int
src_stride
,
unsigned
char
*
upred
,
unsigned
char
*
vpred
,
int
pred_stride
)
{
{
short
*
udiff
=
diff
+
256
;
short
*
udiff
=
diff
+
256
;
short
*
vdiff
=
diff
+
320
;
short
*
vdiff
=
diff
+
320
;
unsigned
char
*
upred
=
pred
+
256
;
unsigned
char
*
vpred
=
pred
+
320
;
int
r
,
c
;
int
r
,
c
;
...
@@ -65,8 +65,8 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
...
@@ -65,8 +65,8 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
}
udiff
+=
8
;
udiff
+=
8
;
upred
+=
8
;
upred
+=
pred_stride
;
usrc
+=
stride
;
usrc
+=
src_
stride
;
}
}
for
(
r
=
0
;
r
<
8
;
r
++
)
for
(
r
=
0
;
r
<
8
;
r
++
)
...
@@ -77,12 +77,13 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
...
@@ -77,12 +77,13 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
}
vdiff
+=
8
;
vdiff
+=
8
;
vpred
+=
8
;
vpred
+=
pred_stride
;
vsrc
+=
stride
;
vsrc
+=
src_
stride
;
}
}
}
}
void
vp8_subtract_mby_c
(
short
*
diff
,
unsigned
char
*
src
,
unsigned
char
*
pred
,
int
stride
)
void
vp8_subtract_mby_c
(
short
*
diff
,
unsigned
char
*
src
,
int
src_stride
,
unsigned
char
*
pred
,
int
pred_stride
)
{
{
int
r
,
c
;
int
r
,
c
;
...
@@ -94,8 +95,8 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, in
...
@@ -94,8 +95,8 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, in
}
}
diff
+=
16
;
diff
+=
16
;
pred
+=
16
;
pred
+=
pred_stride
;
src
+=
stride
;
src
+=
src_
stride
;
}
}
}
}
...
@@ -103,8 +104,11 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
...
@@ -103,8 +104,11 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
{
BLOCK
*
b
=
&
x
->
block
[
0
];
BLOCK
*
b
=
&
x
->
block
[
0
];
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
}
}
static
void
build_dcblock
(
MACROBLOCK
*
x
)
static
void
build_dcblock
(
MACROBLOCK
*
x
)
...
@@ -641,7 +645,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
...
@@ -641,7 +645,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_build_inter16x16_predictors_mby
(
&
x
->
e_mbd
);
vp8_build_inter16x16_predictors_mby
(
&
x
->
e_mbd
);