Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
3c755577
Commit
3c755577
authored
Nov 17, 2011
by
Scott LaVarnway
Committed by
Gerrit Code Review
Nov 17, 2011
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Added predictor stride argument(s) to subtract functions"
parents
120a4640
edd98b73
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
374 additions
and
662 deletions
+374
-662
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+35
-28
vp8/encoder/arm/neon/subtract_neon.asm
vp8/encoder/arm/neon/subtract_neon.asm
+84
-70
vp8/encoder/encodeintra.c
vp8/encoder/encodeintra.c
+4
-2
vp8/encoder/encodemb.c
vp8/encoder/encodemb.c
+18
-13
vp8/encoder/encodemb.h
vp8/encoder/encodemb.h
+4
-2
vp8/encoder/rdopt.c
vp8/encoder/rdopt.c
+7
-5
vp8/encoder/x86/subtract_mmx.asm
vp8/encoder/x86/subtract_mmx.asm
+102
-311
vp8/encoder/x86/subtract_sse2.asm
vp8/encoder/x86/subtract_sse2.asm
+120
-231
No files found.
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
View file @
3c755577
...
...
@@ -72,22 +72,23 @@ loop_block
; r0 short *diff
; r1 unsigned char *usrc
; r2 unsigned char *vsrc
; r3 unsigned char *pred
; stack int stride
; r3 int src_stride
; sp unsigned char *upred
; sp unsigned char *vpred
; sp int pred_stride
|
vp8_subtract_mbuv_armv6
|
PROC
stmfd
sp
!
,
{
r4
-
r1
2
,
lr
}
stmfd
sp
!
,
{
r4
-
r1
1
}
add
r0
,
r0
,
#
512
; set *diff point to Cb
add
r3
,
r3
,
#
256
; set *pred point to Cb
mov
r4
,
#
8
; loop count
ldr
r5
,
[
sp
,
#
40
]
; stride
ldr
r5
,
[
sp
,
#
32
]
; upred
ldr
r12
,
[
sp
,
#
40
]
; pred_stride
; Subtract U block
loop_u
ldr
r6
,
[
r1
]
;
src
(A)
ldr
r7
,
[
r
3
],
#
4
; pred
(A)
ldr
r6
,
[
r1
]
;
usrc
(A)
ldr
r7
,
[
r
5
]
; upred
(A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
...
@@ -97,8 +98,8 @@ loop_u
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r1
,
#
4
]
;
src
(B)
ldr
r11
,
[
r
3
],
#
4
; pred
(B)
ldr
r10
,
[
r1
,
#
4
]
;
usrc
(B)
ldr
r11
,
[
r
5
,
#
4
]
; upred
(B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
...
@@ -114,7 +115,8 @@ loop_u
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
add
r1
,
r1
,
r5
; update usrc pointer
add
r1
,
r1
,
r3
; update usrc pointer
add
r5
,
r5
,
r12
; update upred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
...
@@ -125,12 +127,13 @@ loop_u
bne
loop_u
ldr
r5
,
[
sp
,
#
36
]
; vpred
mov
r4
,
#
8
; loop count
; Subtract V block
loop_v
ldr
r6
,
[
r2
]
;
src
(A)
ldr
r7
,
[
r
3
],
#
4
; pred
(A)
ldr
r6
,
[
r2
]
;
vsrc
(A)
ldr
r7
,
[
r
5
]
; vpred
(A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
...
@@ -140,8 +143,8 @@ loop_v
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r2
,
#
4
]
;
src
(B)
ldr
r11
,
[
r
3
],
#
4
; pred
(B)
ldr
r10
,
[
r2
,
#
4
]
;
vsrc
(B)
ldr
r11
,
[
r
5
,
#
4
]
; vpred
(B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
...
@@ -157,7 +160,8 @@ loop_v
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
add
r2
,
r2
,
r5
; update vsrc pointer
add
r2
,
r2
,
r3
; update vsrc pointer
add
r5
,
r5
,
r12
; update vpred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
...
@@ -168,23 +172,25 @@ loop_v
bne
loop_v
ldmfd
sp
!
,
{
r4
-
r12
,
pc
}
ldmfd
sp
!
,
{
r4
-
r11
}
bx
lr
ENDP
; r0 short *diff
; r1 unsigned char *src
; r2 unsigned char *pred
; r3 int stride
; r2 int src_stride
; r3 unsigned char *pred
; sp int pred_stride
|
vp8_subtract_mby_armv6
|
PROC
stmfd
sp
!
,
{
r4
-
r11
}
ldr
r12
,
[
sp
,
#
32
]
; pred_stride
mov
r4
,
#
16
loop
ldr
r6
,
[
r1
]
; src (A)
ldr
r7
,
[
r
2
],
#
4
; pred (A)
ldr
r7
,
[
r
3
]
; pred (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
...
@@ -195,7 +201,7 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r1
,
#
4
]
; src (B)
ldr
r11
,
[
r
2
],
#
4
; pred (B)
ldr
r11
,
[
r
3
,
#
4
]
; pred (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
...
@@ -212,7 +218,7 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
ldr
r10
,
[
r1
,
#
8
]
; src (C)
ldr
r11
,
[
r
2
],
#
4
; pred (C)
ldr
r11
,
[
r
3
,
#
8
]
; pred (C)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
...
@@ -229,10 +235,10 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (C)
ldr
r10
,
[
r1
,
#
12
]
; src (D)
ldr
r11
,
[
r
2
],
#
4
; pred (D)
ldr
r11
,
[
r
3
,
#
12
]
; pred (D)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (C)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (C)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (C)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (C)
str
r8
,
[
r0
],
#
4
; diff (C)
uxtb16
r8
,
r10
; [s2 | s0] (D)
...
...
@@ -245,7 +251,8 @@ loop
usub16
r6
,
r8
,
r9
; [d2 | d0] (D)
usub16
r7
,
r10
,
r11
; [d3 | d1] (D)
add
r1
,
r1
,
r3
; update src pointer
add
r1
,
r1
,
r2
; update src pointer
add
r3
,
r3
,
r12
; update pred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (D)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (D)
...
...
@@ -257,7 +264,7 @@ loop
bne
loop
ldmfd
sp
!
,
{
r4
-
r11
}
mov
pc
,
lr
bx
lr
ENDP
...
...
vp8/encoder/arm/neon/subtract_neon.asm
View file @
3c755577
...
...
@@ -61,19 +61,24 @@
;==========================================
;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride)
;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
; unsigned char *pred, int pred_stride)
|
vp8_subtract_mby_neon
|
PROC
push
{
r4
-
r7
}
mov
r12
,
#
4
ldr
r4
,
[
sp
,
#
16
]
; pred_stride
mov
r6
,
#
32
; "diff" stride x2
add
r5
,
r0
,
#
16
; second diff pointer
subtract_mby_loop
vld1.8
{
q0
}
,
[
r1
],
r
3
;load src
vld1.8
{
q1
}
,
[
r
2
]
!
;load pred
vld1.8
{
q2
}
,
[
r1
],
r
3
vld1.8
{
q3
}
,
[
r
2
]
!
vld1.8
{
q4
}
,
[
r1
],
r
3
vld1.8
{
q5
}
,
[
r
2
]
!
vld1.8
{
q6
}
,
[
r1
],
r
3
vld1.8
{
q7
}
,
[
r
2
]
!
vld1.8
{
q0
}
,
[
r1
],
r
2
;load src
vld1.8
{
q1
}
,
[
r
3
],
r4
;load pred
vld1.8
{
q2
}
,
[
r1
],
r
2
vld1.8
{
q3
}
,
[
r
3
],
r4
vld1.8
{
q4
}
,
[
r1
],
r
2
vld1.8
{
q5
}
,
[
r
3
],
r4
vld1.8
{
q6
}
,
[
r1
],
r
2
vld1.8
{
q7
}
,
[
r
3
],
r4
vsubl.u8
q8
,
d0
,
d2
vsubl.u8
q9
,
d1
,
d3
...
...
@@ -84,46 +89,53 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d14
vsubl.u8
q15
,
d13
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q8
}
,
[
r0
]
,
r6
;store diff
vst1.16
{
q9
}
,
[
r
5
],
r6
vst1.16
{
q10
}
,
[
r0
]
,
r6
vst1.16
{
q11
}
,
[
r
5
],
r6
vst1.16
{
q12
}
,
[
r0
]
,
r6
vst1.16
{
q13
}
,
[
r
5
],
r6
vst1.16
{
q14
}
,
[
r0
]
,
r6
vst1.16
{
q15
}
,
[
r
5
],
r6
subs
r12
,
r12
,
#
1
bne
subtract_mby_loop
pop
{
r4
-
r7
}
bx
lr
ENDP
;=================================
;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
|
vp8_subtract_mbuv_neon
|
PROC
ldr
r12
,
[
sp
]
push
{
r4
-
r7
}
ldr
r4
,
[
sp
,
#
16
]
; upred
ldr
r5
,
[
sp
,
#
20
]
; vpred
ldr
r6
,
[
sp
,
#
24
]
; pred_stride
add
r0
,
r0
,
#
512
; short *udiff = diff + 256;
mov
r12
,
#
32
; "diff" stride x2
add
r7
,
r0
,
#
16
; second diff pointer
;u
add
r0
,
r0
,
#
512
; short *udiff = diff + 256;
add
r3
,
r3
,
#
256
; unsigned char *upred = pred + 256;
vld1.8
{
d0
}
,
[
r1
],
r12
;load src
vld1.8
{
d1
}
,
[
r3
]
!
;load pred
vld1.8
{
d2
}
,
[
r1
],
r12
vld1.8
{
d3
}
,
[
r3
]
!
vld1.8
{
d4
}
,
[
r1
],
r12
vld1.8
{
d5
}
,
[
r3
]
!
vld1.8
{
d6
}
,
[
r1
],
r12
vld1.8
{
d7
}
,
[
r3
]
!
vld1.8
{
d8
}
,
[
r1
],
r12
vld1.8
{
d9
}
,
[
r3
]
!
vld1.8
{
d10
}
,
[
r1
],
r12
vld1.8
{
d11
}
,
[
r3
]
!
vld1.8
{
d12
}
,
[
r1
],
r12
vld1.8
{
d13
}
,
[
r3
]
!
vld1.8
{
d14
}
,
[
r1
],
r12
vld1.8
{
d15
}
,
[
r3
]
!
vld1.8
{
d0
}
,
[
r1
],
r3
;load usrc
vld1.8
{
d1
}
,
[
r4
],
r6
;load upred
vld1.8
{
d2
}
,
[
r1
],
r3
vld1.8
{
d3
}
,
[
r4
],
r6
vld1.8
{
d4
}
,
[
r1
],
r3
vld1.8
{
d5
}
,
[
r4
],
r6
vld1.8
{
d6
}
,
[
r1
],
r3
vld1.8
{
d7
}
,
[
r4
],
r6
vld1.8
{
d8
}
,
[
r1
],
r3
vld1.8
{
d9
}
,
[
r4
],
r6
vld1.8
{
d10
}
,
[
r1
],
r3
vld1.8
{
d11
}
,
[
r4
],
r6
vld1.8
{
d12
}
,
[
r1
],
r3
vld1.8
{
d13
}
,
[
r4
],
r6
vld1.8
{
d14
}
,
[
r1
],
r3
vld1.8
{
d15
}
,
[
r4
],
r6
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q9
,
d2
,
d3
...
...
@@ -134,32 +146,32 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q15
,
d14
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q8
}
,
[
r0
]
,
r12
;store diff
vst1.16
{
q9
}
,
[
r
7
],
r12
vst1.16
{
q10
}
,
[
r0
]
,
r12
vst1.16
{
q11
}
,
[
r
7
],
r12
vst1.16
{
q12
}
,
[
r0
]
,
r12
vst1.16
{
q13
}
,
[
r
7
],
r12
vst1.16
{
q14
}
,
[
r0
]
,
r12
vst1.16
{
q15
}
,
[
r
7
],
r12
;v
vld1.8
{
d0
}
,
[
r2
],
r
12
;load
src
vld1.8
{
d1
}
,
[
r
3
]
!
;load
pred
vld1.8
{
d2
}
,
[
r2
],
r
12
vld1.8
{
d3
}
,
[
r
3
]
!
vld1.8
{
d4
}
,
[
r2
],
r
12
vld1.8
{
d5
}
,
[
r
3
]
!
vld1.8
{
d6
}
,
[
r2
],
r
12
vld1.8
{
d7
}
,
[
r
3
]
!
vld1.8
{
d8
}
,
[
r2
],
r
12
vld1.8
{
d9
}
,
[
r
3
]
!
vld1.8
{
d10
}
,
[
r2
],
r
12
vld1.8
{
d11
}
,
[
r
3
]
!
vld1.8
{
d12
}
,
[
r2
],
r
12
vld1.8
{
d13
}
,
[
r
3
]
!
vld1.8
{
d14
}
,
[
r2
],
r
12
vld1.8
{
d15
}
,
[
r
3
]
!
vld1.8
{
d0
}
,
[
r2
],
r
3
;load v
src
vld1.8
{
d1
}
,
[
r
5
],
r6
;load v
pred
vld1.8
{
d2
}
,
[
r2
],
r
3
vld1.8
{
d3
}
,
[
r
5
],
r6
vld1.8
{
d4
}
,
[
r2
],
r
3
vld1.8
{
d5
}
,
[
r
5
],
r6
vld1.8
{
d6
}
,
[
r2
],
r
3
vld1.8
{
d7
}
,
[
r
5
],
r6
vld1.8
{
d8
}
,
[
r2
],
r
3
vld1.8
{
d9
}
,
[
r
5
],
r6
vld1.8
{
d10
}
,
[
r2
],
r
3
vld1.8
{
d11
}
,
[
r
5
],
r6
vld1.8
{
d12
}
,
[
r2
],
r
3
vld1.8
{
d13
}
,
[
r
5
],
r6
vld1.8
{
d14
}
,
[
r2
],
r
3
vld1.8
{
d15
}
,
[
r
5
],
r6
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q9
,
d2
,
d3
...
...
@@ -170,16 +182,18 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q15
,
d14
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q8
}
,
[
r0
]
,
r12
;store diff
vst1.16
{
q9
}
,
[
r
7
],
r12
vst1.16
{
q10
}
,
[
r0
]
,
r12
vst1.16
{
q11
}
,
[
r
7
],
r12
vst1.16
{
q12
}
,
[
r0
]
,
r12
vst1.16
{
q13
}
,
[
r
7
],
r12
vst1.16
{
q14
}
,
[
r0
]
,
r12
vst1.16
{
q15
}
,
[
r
7
],
r12
pop
{
r4
-
r7
}
bx
lr
ENDP
END
vp8/encoder/encodeintra.c
View file @
3c755577
...
...
@@ -100,7 +100,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mby
)(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
vp8_transform_intra_mby
(
x
);
...
...
@@ -115,7 +115,9 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mbuv
)(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
...
...
vp8/encoder/encodemb.c
View file @
3c755577
...
...
@@ -48,12 +48,12 @@ void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
}
}
void
vp8_subtract_mbuv_c
(
short
*
diff
,
unsigned
char
*
usrc
,
unsigned
char
*
vsrc
,
unsigned
char
*
pred
,
int
stride
)
void
vp8_subtract_mbuv_c
(
short
*
diff
,
unsigned
char
*
usrc
,
unsigned
char
*
vsrc
,
int
src_stride
,
unsigned
char
*
upred
,
unsigned
char
*
vpred
,
int
pred_stride
)
{
short
*
udiff
=
diff
+
256
;
short
*
vdiff
=
diff
+
320
;
unsigned
char
*
upred
=
pred
+
256
;
unsigned
char
*
vpred
=
pred
+
320
;
int
r
,
c
;
...
...
@@ -65,8 +65,8 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
udiff
+=
8
;
upred
+=
8
;
usrc
+=
stride
;
upred
+=
pred_stride
;
usrc
+=
s
rc_s
tride
;
}
for
(
r
=
0
;
r
<
8
;
r
++
)
...
...
@@ -77,12 +77,13 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
vdiff
+=
8
;
vpred
+=
8
;
vsrc
+=
stride
;
vpred
+=
pred_stride
;
vsrc
+=
s
rc_s
tride
;
}
}
void
vp8_subtract_mby_c
(
short
*
diff
,
unsigned
char
*
src
,
unsigned
char
*
pred
,
int
stride
)
void
vp8_subtract_mby_c
(
short
*
diff
,
unsigned
char
*
src
,
int
src_stride
,
unsigned
char
*
pred
,
int
pred_stride
)
{
int
r
,
c
;
...
...
@@ -94,8 +95,8 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, in
}
diff
+=
16
;
pred
+=
16
;
src
+=
stride
;
pred
+=
pred_stride
;
src
+=
s
rc_s
tride
;
}
}
...
...
@@ -103,8 +104,11 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
BLOCK
*
b
=
&
x
->
block
[
0
];
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
}
static
void
build_dcblock
(
MACROBLOCK
*
x
)
...
...
@@ -641,7 +645,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_build_inter16x16_predictors_mby
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
transform_mby
(
x
);
...
...
vp8/encoder/encodemb.h
View file @
3c755577
...
...
@@ -28,11 +28,13 @@
void (sym)(BLOCK *be,BLOCKD *bd, int pitch)
#define prototype_submby(sym) \
void (sym)(short *diff, unsigned char *src, unsigned char *pred, int stride)
void (sym)(short *diff, unsigned char *src, int src_stride, \
unsigned char *pred, int pred_stride)
#define prototype_submbuv(sym) \
void (sym)(short *diff, unsigned char *usrc, unsigned char *vsrc,\
unsigned char *pred, int stride)
int src_stride, unsigned char *upred, unsigned char *vpred,\
int pred_stride)
#if ARCH_X86 || ARCH_X86_64
#include "x86/encodemb_x86.h"
...
...
vp8/encoder/rdopt.c
View file @
3c755577
...
...
@@ -552,7 +552,7 @@ static void macro_block_yrd( MACROBLOCK *mb,
int
d
;
ENCODEMB_INVOKE
(
rtcd
,
submby
)(
mb
->
src_diff
,
*
(
mb
->
block
[
0
].
base_src
),
mb
->
e_mbd
.
predictor
,
mb
->
block
[
0
].
src_stride
);
mb
->
block
[
0
].
src_stride
,
mb
->
e_mbd
.
predictor
,
16
);
// Fdct and building the 2nd order block
for
(
beptr
=
mb
->
block
;
beptr
<
mb
->
block
+
16
;
beptr
+=
2
)
...
...
@@ -800,7 +800,8 @@ static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
{
vp8_build_inter16x16_predictors_mbuv
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
IF_RTCD
(
&
cpi
->
rtcd
.
encodemb
),
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
vp8_quantize_mbuv
(
x
);
...
...
@@ -816,7 +817,8 @@ static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
{
vp8_build_inter4x4_predictors_mbuv
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
IF_RTCD
(
&
cpi
->
rtcd
.
encodemb
),
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
vp8_quantize_mbuv
(
x
);
...
...
@@ -845,8 +847,8 @@ static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int
RECON_INVOKE
(
&
cpi
->
rtcd
.
common
->
recon
,
build_intra_predictors_mbuv
)
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
IF_RTCD
(
&
cpi
->
rtcd
.
encodemb
),
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
vp8_quantize_mbuv
(
x
);
...
...
vp8/encoder/x86/subtract_mmx.asm
View file @
3c755577
...
...
@@ -73,74 +73,71 @@ sym(vp8_subtract_b_mmx_impl):
pop
rbp
ret
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
;unsigned char *pred, int pred_stride)
global
sym
(
vp8_subtract_mby_mmx
)
sym
(
vp8_subtract_mby_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
4
SHADOW_ARGS_TO_STACK
5
push
rsi
push
rdi
; end prolog
mov
rdi
,
arg
(
0
)
;diff
mov
rsi
,
arg
(
1
)
;src
movsxd
rdx
,
dword
ptr
arg
(
2
)
;src_stride
mov
rax
,
arg
(
3
)
;pred
push
rbx
movsxd
rbx
,
dword
ptr
arg
(
4
)
;pred_stride
mov
rsi
,
arg
(
1
)
;src
mov
rdi
,
arg
(
0
)
;diff
mov
rax
,
arg
(
2
)
;pred
movsxd
rdx
,
dword
ptr
arg
(
3
)
;stride
pxor
mm0
,
mm0
mov
rcx
,
16
mov
rcx
,
16
pxor
mm0
,
mm0
.submby_loop:
movq
mm1
,
[
rsi
]
movq
mm3
,
[
rax
]
movq
mm1
,
[
rsi
]
movq
mm3
,
[
rax
]
movq
mm2
,
mm1
movq
mm4
,
mm3
punpcklbw
mm1
,
mm0
punpcklbw
mm3
,
mm0
punpckhbw
mm2
,
mm0
punpckhbw
mm4
,
mm0
psubw
mm1
,
mm3
psubw
mm2
,
mm4
movq
mm2
,
mm1
movq
mm4
,
mm3
movq
[
rdi
],
mm1
movq
[
rdi
+
8
],
mm2
punpcklbw
mm1
,
mm0
punpcklbw
mm3
,
mm0
punpckhbw
mm2
,
mm0
punpckhbw
mm4
,
mm0
movq
mm1
,
[
rsi
+
8
]
movq
mm3
,
[
rax
+
8
]
psubw
mm1
,
mm3
psubw
mm2
,
mm4
movq
mm2
,
mm1
movq
mm4
,
mm3
movq
[
rdi
],
mm1
movq
[
rdi
+
8
],
mm2
punpcklbw
mm1
,
mm0
punpcklbw
mm3
,
mm0
movq
mm1
,
[
rsi
+
8
]
movq
mm3
,
[
rax
+
8
]
punpckhbw
mm2
,
mm0
punpckhbw
mm4
,
mm0
movq
mm2
,
mm1
movq
mm4
,
mm3
psubw
mm1
,
mm3
psubw
mm2
,
mm4
punpcklbw
mm1
,
mm0
punpcklbw
mm3
,
mm0