Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
3c755577
Commit
3c755577
authored
Nov 17, 2011
by
Scott LaVarnway
Committed by
Gerrit Code Review
Nov 17, 2011
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Added predictor stride argument(s) to subtract functions"
parents
120a4640
edd98b73
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
374 additions
and
662 deletions
+374
-662
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+35
-28
vp8/encoder/arm/neon/subtract_neon.asm
vp8/encoder/arm/neon/subtract_neon.asm
+84
-70
vp8/encoder/encodeintra.c
vp8/encoder/encodeintra.c
+4
-2
vp8/encoder/encodemb.c
vp8/encoder/encodemb.c
+18
-13
vp8/encoder/encodemb.h
vp8/encoder/encodemb.h
+4
-2
vp8/encoder/rdopt.c
vp8/encoder/rdopt.c
+7
-5
vp8/encoder/x86/subtract_mmx.asm
vp8/encoder/x86/subtract_mmx.asm
+102
-311
vp8/encoder/x86/subtract_sse2.asm
vp8/encoder/x86/subtract_sse2.asm
+120
-231
No files found.
vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
View file @
3c755577
...
...
@@ -72,22 +72,23 @@ loop_block
; r0 short *diff
; r1 unsigned char *usrc
; r2 unsigned char *vsrc
; r3 unsigned char *pred
; stack int stride
; r3 int src_stride
; sp unsigned char *upred
; sp unsigned char *vpred
; sp int pred_stride
|
vp8_subtract_mbuv_armv6
|
PROC
stmfd
sp
!
,
{
r4
-
r1
2
,
lr
}
stmfd
sp
!
,
{
r4
-
r1
1
}
add
r0
,
r0
,
#
512
; set *diff point to Cb
add
r3
,
r3
,
#
256
; set *pred point to Cb
mov
r4
,
#
8
; loop count
ldr
r5
,
[
sp
,
#
40
]
; stride
ldr
r5
,
[
sp
,
#
32
]
; upred
ldr
r12
,
[
sp
,
#
40
]
; pred_stride
; Subtract U block
loop_u
ldr
r6
,
[
r1
]
; src
(A)
ldr
r7
,
[
r
3
],
#
4
; pred
(A)
ldr
r6
,
[
r1
]
;
u
src (A)
ldr
r7
,
[
r
5
]
;
u
pred (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
...
@@ -97,8 +98,8 @@ loop_u
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r1
,
#
4
]
; src
(B)
ldr
r11
,
[
r
3
]
,
#
4
; pred
(B)
ldr
r10
,
[
r1
,
#
4
]
;
u
src (B)
ldr
r11
,
[
r
5
,
#
4
]
;
u
pred (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
...
@@ -114,7 +115,8 @@ loop_u
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
add
r1
,
r1
,
r5
; update usrc pointer
add
r1
,
r1
,
r3
; update usrc pointer
add
r5
,
r5
,
r12
; update upred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
...
@@ -125,12 +127,13 @@ loop_u
bne
loop_u
ldr
r5
,
[
sp
,
#
36
]
; vpred
mov
r4
,
#
8
; loop count
; Subtract V block
loop_v
ldr
r6
,
[
r2
]
; src
(A)
ldr
r7
,
[
r
3
],
#
4
; pred
(A)
ldr
r6
,
[
r2
]
;
v
src (A)
ldr
r7
,
[
r
5
]
;
v
pred (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
...
@@ -140,8 +143,8 @@ loop_v
usub16
r6
,
r8
,
r9
; [d2 | d0] (A)
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r2
,
#
4
]
; src
(B)
ldr
r11
,
[
r
3
]
,
#
4
; pred
(B)
ldr
r10
,
[
r2
,
#
4
]
;
v
src (B)
ldr
r11
,
[
r
5
,
#
4
]
;
v
pred (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
...
@@ -157,7 +160,8 @@ loop_v
usub16
r6
,
r8
,
r9
; [d2 | d0] (B)
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
add
r2
,
r2
,
r5
; update vsrc pointer
add
r2
,
r2
,
r3
; update vsrc pointer
add
r5
,
r5
,
r12
; update vpred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
...
@@ -168,23 +172,25 @@ loop_v
bne
loop_v
ldmfd
sp
!
,
{
r4
-
r12
,
pc
}
ldmfd
sp
!
,
{
r4
-
r11
}
bx
lr
ENDP
; r0 short *diff
; r1 unsigned char *src
; r2 unsigned char *pred
; r3 int stride
; r2 int src_stride
; r3 unsigned char *pred
; sp int pred_stride
|
vp8_subtract_mby_armv6
|
PROC
stmfd
sp
!
,
{
r4
-
r11
}
ldr
r12
,
[
sp
,
#
32
]
; pred_stride
mov
r4
,
#
16
loop
ldr
r6
,
[
r1
]
; src (A)
ldr
r7
,
[
r
2
],
#
4
; pred (A)
ldr
r7
,
[
r
3
]
; pred (A)
uxtb16
r8
,
r6
; [s2 | s0] (A)
uxtb16
r9
,
r7
; [p2 | p0] (A)
...
...
@@ -195,7 +201,7 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (A)
ldr
r10
,
[
r1
,
#
4
]
; src (B)
ldr
r11
,
[
r
2
]
,
#
4
; pred (B)
ldr
r11
,
[
r
3
,
#
4
]
; pred (B)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (A)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (A)
...
...
@@ -212,7 +218,7 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (B)
ldr
r10
,
[
r1
,
#
8
]
; src (C)
ldr
r11
,
[
r
2
]
,
#
4
; pred (C)
ldr
r11
,
[
r
3
,
#
8
]
; pred (C)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (B)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (B)
...
...
@@ -229,7 +235,7 @@ loop
usub16
r7
,
r10
,
r11
; [d3 | d1] (C)
ldr
r10
,
[
r1
,
#
12
]
; src (D)
ldr
r11
,
[
r
2
]
,
#
4
; pred (D)
ldr
r11
,
[
r
3
,
#
12
]
; pred (D)
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (C)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (C)
...
...
@@ -245,7 +251,8 @@ loop
usub16
r6
,
r8
,
r9
; [d2 | d0] (D)
usub16
r7
,
r10
,
r11
; [d3 | d1] (D)
add
r1
,
r1
,
r3
; update src pointer
add
r1
,
r1
,
r2
; update src pointer
add
r3
,
r3
,
r12
; update pred pointer
pkhbt
r8
,
r6
,
r7
,
lsl
#
16
; [d1 | d0] (D)
pkhtb
r9
,
r7
,
r6
,
asr
#
16
; [d3 | d2] (D)
...
...
@@ -257,7 +264,7 @@ loop
bne
loop
ldmfd
sp
!
,
{
r4
-
r11
}
mov
pc
,
lr
bx
lr
ENDP
...
...
vp8/encoder/arm/neon/subtract_neon.asm
View file @
3c755577
...
...
@@ -61,19 +61,24 @@
;==========================================
;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride)
;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
; unsigned char *pred, int pred_stride)
|
vp8_subtract_mby_neon
|
PROC
push
{
r4
-
r7
}
mov
r12
,
#
4
ldr
r4
,
[
sp
,
#
16
]
; pred_stride
mov
r6
,
#
32
; "diff" stride x2
add
r5
,
r0
,
#
16
; second diff pointer
subtract_mby_loop
vld1.8
{
q0
}
,
[
r1
],
r
3
;load src
vld1.8
{
q1
}
,
[
r
2
]
!
;load pred
vld1.8
{
q2
}
,
[
r1
],
r
3
vld1.8
{
q3
}
,
[
r
2
]
!
vld1.8
{
q4
}
,
[
r1
],
r
3
vld1.8
{
q5
}
,
[
r
2
]
!
vld1.8
{
q6
}
,
[
r1
],
r
3
vld1.8
{
q7
}
,
[
r
2
]
!
vld1.8
{
q0
}
,
[
r1
],
r
2
;load src
vld1.8
{
q1
}
,
[
r
3
],
r4
;load pred
vld1.8
{
q2
}
,
[
r1
],
r
2
vld1.8
{
q3
}
,
[
r
3
],
r4
vld1.8
{
q4
}
,
[
r1
],
r
2
vld1.8
{
q5
}
,
[
r
3
],
r4
vld1.8
{
q6
}
,
[
r1
],
r
2
vld1.8
{
q7
}
,
[
r
3
],
r4
vsubl.u8
q8
,
d0
,
d2
vsubl.u8
q9
,
d1
,
d3
...
...
@@ -84,46 +89,53 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d14
vsubl.u8
q15
,
d13
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q8
}
,
[
r0
]
,
r6
;store diff
vst1.16
{
q9
}
,
[
r
5
],
r6
vst1.16
{
q10
}
,
[
r0
]
,
r6
vst1.16
{
q11
}
,
[
r
5
],
r6
vst1.16
{
q12
}
,
[
r0
]
,
r6
vst1.16
{
q13
}
,
[
r
5
],
r6
vst1.16
{
q14
}
,
[
r0
]
,
r6
vst1.16
{
q15
}
,
[
r
5
],
r6
subs
r12
,
r12
,
#
1
bne
subtract_mby_loop
pop
{
r4
-
r7
}
bx
lr
ENDP
;=================================
;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
|
vp8_subtract_mbuv_neon
|
PROC
ldr
r12
,
[
sp
]
push
{
r4
-
r7
}
ldr
r4
,
[
sp
,
#
16
]
; upred
ldr
r5
,
[
sp
,
#
20
]
; vpred
ldr
r6
,
[
sp
,
#
24
]
; pred_stride
add
r0
,
r0
,
#
512
; short *udiff = diff + 256;
mov
r12
,
#
32
; "diff" stride x2
add
r7
,
r0
,
#
16
; second diff pointer
;u
add
r0
,
r0
,
#
512
; short *udiff = diff + 256;
add
r3
,
r3
,
#
256
; unsigned char *upred = pred + 256;
vld1.8
{
d0
}
,
[
r1
],
r12
;load src
vld1.8
{
d1
}
,
[
r3
]
!
;load pred
vld1.8
{
d2
}
,
[
r1
],
r12
vld1.8
{
d3
}
,
[
r3
]
!
vld1.8
{
d4
}
,
[
r1
],
r12
vld1.8
{
d5
}
,
[
r3
]
!
vld1.8
{
d6
}
,
[
r1
],
r12
vld1.8
{
d7
}
,
[
r3
]
!
vld1.8
{
d8
}
,
[
r1
],
r12
vld1.8
{
d9
}
,
[
r3
]
!
vld1.8
{
d10
}
,
[
r1
],
r12
vld1.8
{
d11
}
,
[
r3
]
!
vld1.8
{
d12
}
,
[
r1
],
r12
vld1.8
{
d13
}
,
[
r3
]
!
vld1.8
{
d14
}
,
[
r1
],
r12
vld1.8
{
d15
}
,
[
r3
]
!
vld1.8
{
d0
}
,
[
r1
],
r3
;load usrc
vld1.8
{
d1
}
,
[
r4
],
r6
;load upred
vld1.8
{
d2
}
,
[
r1
],
r3
vld1.8
{
d3
}
,
[
r4
],
r6
vld1.8
{
d4
}
,
[
r1
],
r3
vld1.8
{
d5
}
,
[
r4
],
r6
vld1.8
{
d6
}
,
[
r1
],
r3
vld1.8
{
d7
}
,
[
r4
],
r6
vld1.8
{
d8
}
,
[
r1
],
r3
vld1.8
{
d9
}
,
[
r4
],
r6
vld1.8
{
d10
}
,
[
r1
],
r3
vld1.8
{
d11
}
,
[
r4
],
r6
vld1.8
{
d12
}
,
[
r1
],
r3
vld1.8
{
d13
}
,
[
r4
],
r6
vld1.8
{
d14
}
,
[
r1
],
r3
vld1.8
{
d15
}
,
[
r4
],
r6
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q9
,
d2
,
d3
...
...
@@ -134,32 +146,32 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q15
,
d14
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q8
}
,
[
r0
]
,
r12
;store diff
vst1.16
{
q9
}
,
[
r
7
],
r12
vst1.16
{
q10
}
,
[
r0
]
,
r12
vst1.16
{
q11
}
,
[
r
7
],
r12
vst1.16
{
q12
}
,
[
r0
]
,
r12
vst1.16
{
q13
}
,
[
r
7
],
r12
vst1.16
{
q14
}
,
[
r0
]
,
r12
vst1.16
{
q15
}
,
[
r
7
],
r12
;v
vld1.8
{
d0
}
,
[
r2
],
r
12
;load src
vld1.8
{
d1
}
,
[
r
3
]
!
;load pred
vld1.8
{
d2
}
,
[
r2
],
r
12
vld1.8
{
d3
}
,
[
r
3
]
!
vld1.8
{
d4
}
,
[
r2
],
r
12
vld1.8
{
d5
}
,
[
r
3
]
!
vld1.8
{
d6
}
,
[
r2
],
r
12
vld1.8
{
d7
}
,
[
r
3
]
!
vld1.8
{
d8
}
,
[
r2
],
r
12
vld1.8
{
d9
}
,
[
r
3
]
!
vld1.8
{
d10
}
,
[
r2
],
r
12
vld1.8
{
d11
}
,
[
r
3
]
!
vld1.8
{
d12
}
,
[
r2
],
r
12
vld1.8
{
d13
}
,
[
r
3
]
!
vld1.8
{
d14
}
,
[
r2
],
r
12
vld1.8
{
d15
}
,
[
r
3
]
!
vld1.8
{
d0
}
,
[
r2
],
r
3
;load
v
src
vld1.8
{
d1
}
,
[
r
5
],
r6
;load
v
pred
vld1.8
{
d2
}
,
[
r2
],
r
3
vld1.8
{
d3
}
,
[
r
5
],
r6
vld1.8
{
d4
}
,
[
r2
],
r
3
vld1.8
{
d5
}
,
[
r
5
],
r6
vld1.8
{
d6
}
,
[
r2
],
r
3
vld1.8
{
d7
}
,
[
r
5
],
r6
vld1.8
{
d8
}
,
[
r2
],
r
3
vld1.8
{
d9
}
,
[
r
5
],
r6
vld1.8
{
d10
}
,
[
r2
],
r
3
vld1.8
{
d11
}
,
[
r
5
],
r6
vld1.8
{
d12
}
,
[
r2
],
r
3
vld1.8
{
d13
}
,
[
r
5
],
r6
vld1.8
{
d14
}
,
[
r2
],
r
3
vld1.8
{
d15
}
,
[
r
5
],
r6
vsubl.u8
q8
,
d0
,
d1
vsubl.u8
q9
,
d2
,
d3
...
...
@@ -170,16 +182,18 @@ subtract_mby_loop
vsubl.u8
q14
,
d12
,
d13
vsubl.u8
q15
,
d14
,
d15
vst1.16
{
q8
}
,
[
r0
]
!
;store diff
vst1.16
{
q9
}
,
[
r
0
]
!
vst1.16
{
q10
}
,
[
r0
]
!
vst1.16
{
q11
}
,
[
r
0
]
!
vst1.16
{
q12
}
,
[
r0
]
!
vst1.16
{
q13
}
,
[
r
0
]
!
vst1.16
{
q14
}
,
[
r0
]
!
vst1.16
{
q15
}
,
[
r
0
]
!
vst1.16
{
q8
}
,
[
r0
]
,
r12
;store diff
vst1.16
{
q9
}
,
[
r
7
],
r12
vst1.16
{
q10
}
,
[
r0
]
,
r12
vst1.16
{
q11
}
,
[
r
7
],
r12
vst1.16
{
q12
}
,
[
r0
]
,
r12
vst1.16
{
q13
}
,
[
r
7
],
r12
vst1.16
{
q14
}
,
[
r0
]
,
r12
vst1.16
{
q15
}
,
[
r
7
],
r12
pop
{
r4
-
r7
}
bx
lr
ENDP
END
vp8/encoder/encodeintra.c
View file @
3c755577
...
...
@@ -100,7 +100,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mby
)(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
vp8_transform_intra_mby
(
x
);
...
...
@@ -115,7 +115,9 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
RECON_INVOKE
(
&
rtcd
->
common
->
recon
,
build_intra_predictors_mbuv
)(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
...
...
vp8/encoder/encodemb.c
View file @
3c755577
...
...
@@ -48,12 +48,12 @@ void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
}
}
void
vp8_subtract_mbuv_c
(
short
*
diff
,
unsigned
char
*
usrc
,
unsigned
char
*
vsrc
,
unsigned
char
*
pred
,
int
stride
)
void
vp8_subtract_mbuv_c
(
short
*
diff
,
unsigned
char
*
usrc
,
unsigned
char
*
vsrc
,
int
src_stride
,
unsigned
char
*
upred
,
unsigned
char
*
vpred
,
int
pred_stride
)
{
short
*
udiff
=
diff
+
256
;
short
*
vdiff
=
diff
+
320
;
unsigned
char
*
upred
=
pred
+
256
;
unsigned
char
*
vpred
=
pred
+
320
;
int
r
,
c
;
...
...
@@ -65,8 +65,8 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
udiff
+=
8
;
upred
+=
8
;
usrc
+=
stride
;
upred
+=
pred_stride
;
usrc
+=
src_
stride
;
}
for
(
r
=
0
;
r
<
8
;
r
++
)
...
...
@@ -77,12 +77,13 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
vdiff
+=
8
;
vpred
+=
8
;
vsrc
+=
stride
;
vpred
+=
pred_stride
;
vsrc
+=
src_
stride
;
}
}
void
vp8_subtract_mby_c
(
short
*
diff
,
unsigned
char
*
src
,
unsigned
char
*
pred
,
int
stride
)
void
vp8_subtract_mby_c
(
short
*
diff
,
unsigned
char
*
src
,
int
src_stride
,
unsigned
char
*
pred
,
int
pred_stride
)
{
int
r
,
c
;
...
...
@@ -94,8 +95,8 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, in
}
diff
+=
16
;
pred
+=
16
;
src
+=
stride
;
pred
+=
pred_stride
;
src
+=
src_
stride
;
}
}
...
...
@@ -103,8 +104,11 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
BLOCK
*
b
=
&
x
->
block
[
0
];
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
}
static
void
build_dcblock
(
MACROBLOCK
*
x
)
...
...
@@ -641,7 +645,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_build_inter16x16_predictors_mby
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
x
->
e_mbd
.
predictor
,
b
->
src_stride
);
ENCODEMB_INVOKE
(
&
rtcd
->
encodemb
,
submby
)(
x
->
src_diff
,
*
(
b
->
base_src
),
b
->
src_stride
,
x
->
e_mbd
.
predictor
,
16
);
transform_mby
(
x
);
...
...
vp8/encoder/encodemb.h
View file @
3c755577
...
...
@@ -28,11 +28,13 @@
void (sym)(BLOCK *be,BLOCKD *bd, int pitch)
#define prototype_submby(sym) \
void (sym)(short *diff, unsigned char *src, unsigned char *pred, int stride)
void (sym)(short *diff, unsigned char *src, int src_stride, \
unsigned char *pred, int pred_stride)
#define prototype_submbuv(sym) \
void (sym)(short *diff, unsigned char *usrc, unsigned char *vsrc,\
unsigned char *pred, int stride)
int src_stride, unsigned char *upred, unsigned char *vpred,\
int pred_stride)
#if ARCH_X86 || ARCH_X86_64
#include "x86/encodemb_x86.h"
...
...
vp8/encoder/rdopt.c
View file @
3c755577
...
...
@@ -552,7 +552,7 @@ static void macro_block_yrd( MACROBLOCK *mb,
int
d
;
ENCODEMB_INVOKE
(
rtcd
,
submby
)(
mb
->
src_diff
,
*
(
mb
->
block
[
0
].
base_src
),
mb
->
e_mbd
.
predictor
,
mb
->
block
[
0
].
src_stride
);
mb
->
block
[
0
].
src_stride
,
mb
->
e_mbd
.
predictor
,
16
);
// Fdct and building the 2nd order block
for
(
beptr
=
mb
->
block
;
beptr
<
mb
->
block
+
16
;
beptr
+=
2
)
...
...
@@ -800,7 +800,8 @@ static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
{
vp8_build_inter16x16_predictors_mbuv
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
IF_RTCD
(
&
cpi
->
rtcd
.
encodemb
),
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
vp8_quantize_mbuv
(
x
);
...
...
@@ -816,7 +817,8 @@ static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
{
vp8_build_inter4x4_predictors_mbuv
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
IF_RTCD
(
&
cpi
->
rtcd
.
encodemb
),
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
vp8_quantize_mbuv
(
x
);
...
...
@@ -845,8 +847,8 @@ static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int
RECON_INVOKE
(
&
cpi
->
rtcd
.
common
->
recon
,
build_intra_predictors_mbuv
)
(
&
x
->
e_mbd
);
ENCODEMB_INVOKE
(
IF_RTCD
(
&
cpi
->
rtcd
.
encodemb
),
submbuv
)(
x
->
src_diff
,
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
e_mbd
.
predictor
,
x
->
src
.
uv_stride
);
x
->
src
.
u_buffer
,
x
->
src
.
v_buffer
,
x
->
src
.
uv_stride
,
&
x
->
e_mbd
.
predictor
[
256
],
&
x
->
e_mbd
.
predictor
[
320
],
8
);
vp8_transform_mbuv
(
x
);
vp8_quantize_mbuv
(
x
);
...
...
vp8/encoder/x86/subtract_mmx.asm
View file @
3c755577
...
...
@@ -73,28 +73,29 @@ sym(vp8_subtract_b_mmx_impl):
pop
rbp
ret
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
;unsigned char *pred, int pred_stride)
global
sym
(
vp8_subtract_mby_mmx
)
sym
(
vp8_subtract_mby_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
4
SHADOW_ARGS_TO_STACK
5
push
rsi
push
rdi
; end prolog
mov
rsi
,
arg
(
1
)
;src
mov
rdi
,
arg
(
0
)
;diff
mov
rsi
,
arg
(
1
)
;src
movsxd
rdx
,
dword
ptr
arg
(
2
)
;src_stride
mov
rax
,
arg
(
3
)
;pred
push
rbx
movsxd
rbx
,
dword
ptr
arg
(
4
)
;pred_stride
mov
rax
,
arg
(
2
)
;pred
movsxd
rdx
,
dword
ptr
arg
(
3
)
;stride
mov
rcx
,
16
pxor
mm0
,
mm0
mov
rcx
,
16
.submby_loop:
.submby_loop:
movq
mm1
,
[
rsi
]
movq
mm3
,
[
rax
]
...
...
@@ -113,7 +114,6 @@ sym(vp8_subtract_mby_mmx):
movq
[
rdi
],
mm1
movq
[
rdi
+
8
],
mm2
movq
mm1
,
[
rsi
+
8
]
movq
mm3
,
[
rax
+
8
]
...
...
@@ -131,16 +131,13 @@ sym(vp8_subtract_mby_mmx):
movq
[
rdi
+
16
],
mm1
movq
[
rdi
+
24
],
mm2
add
rdi
,
32
add
rax
,
16
lea
rax
,
[
rax
+
rbx
]
lea
rsi
,
[
rsi
+
rdx
]
sub
rcx
,
1
dec
rcx
jnz
.submby_loop
pop
rbx
pop
rdi
pop
rsi
; begin epilog
...
...
@@ -149,163 +146,31 @@ sym(vp8_subtract_mby_mmx):
ret
;void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
global
sym
(
vp8_subtract_mbuv_mmx
)
sym
(
vp8_subtract_mbuv_mmx
):
push
rbp
mov
rbp
,
rsp