Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
f46e17fd
Commit
f46e17fd
authored
Nov 28, 2011
by
Scott LaVarnway
Committed by
Gerrit Code Review
Nov 28, 2011
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Modified the inverse walsh to output directly"
parents
e2bacd58
4a91541c
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
30 changed files
with
275 additions
and
1200 deletions
+275
-1200
vp8/common/arm/arm_systemdependent.c
vp8/common/arm/arm_systemdependent.c
+0
-2
vp8/common/arm/armv6/iwalsh_v6.asm
vp8/common/arm/armv6/iwalsh_v6.asm
+54
-70
vp8/common/arm/idct_arm.h
vp8/common/arm/idct_arm.h
+0
-6
vp8/common/arm/neon/iwalsh_neon.asm
vp8/common/arm/neon/iwalsh_neon.asm
+22
-15
vp8/common/idct.h
vp8/common/idct.h
+4
-0
vp8/common/idctllm.c
vp8/common/idctllm.c
+10
-10
vp8/common/invtrans.c
vp8/common/invtrans.c
+1
-15
vp8/common/x86/idct_x86.h
vp8/common/x86/idct_x86.h
+0
-4
vp8/common/x86/iwalsh_mmx.asm
vp8/common/x86/iwalsh_mmx.asm
+44
-40
vp8/common/x86/iwalsh_sse2.asm
vp8/common/x86/iwalsh_sse2.asm
+44
-2
vp8/common/x86/x86_systemdependent.c
vp8/common/x86/x86_systemdependent.c
+0
-3
vp8/decoder/arm/arm_dsystemdependent.c
vp8/decoder/arm/arm_dsystemdependent.c
+0
-5
vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
+0
-213
vp8/decoder/arm/armv6/idct_blk_v6.c
vp8/decoder/arm/armv6/idct_blk_v6.c
+0
-41
vp8/decoder/arm/dequantize_arm.h
vp8/decoder/arm/dequantize_arm.h
+0
-16
vp8/decoder/arm/neon/idct_blk_neon.c
vp8/decoder/arm/neon/idct_blk_neon.c
+0
-35
vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
+0
-75
vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
+0
-208
vp8/decoder/decodframe.c
vp8/decoder/decodframe.c
+38
-30
vp8/decoder/dequantize.c
vp8/decoder/dequantize.c
+0
-19
vp8/decoder/dequantize.h
vp8/decoder/dequantize.h
+0
-27
vp8/decoder/generic/dsystemdependent.c
vp8/decoder/generic/dsystemdependent.c
+0
-2
vp8/decoder/idct_blk.c
vp8/decoder/idct_blk.c
+0
-27
vp8/decoder/threading.c
vp8/decoder/threading.c
+58
-43
vp8/decoder/x86/dequantize_mmx.asm
vp8/decoder/x86/dequantize_mmx.asm
+0
-201
vp8/decoder/x86/dequantize_x86.h
vp8/decoder/x86/dequantize_x86.h
+0
-12
vp8/decoder/x86/idct_blk_mmx.c
vp8/decoder/x86/idct_blk_mmx.c
+0
-35
vp8/decoder/x86/idct_blk_sse2.c
vp8/decoder/x86/idct_blk_sse2.c
+0
-37
vp8/decoder/x86/x86_dsystemdependent.c
vp8/decoder/x86/x86_dsystemdependent.c
+0
-4
vp8/vp8dx_arm.mk
vp8/vp8dx_arm.mk
+0
-3
No files found.
vp8/common/arm/arm_systemdependent.c
View file @
f46e17fd
...
...
@@ -46,7 +46,6 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
rtcd
->
subpix
.
bilinear4x4
=
vp8_bilinear_predict4x4_armv6
;
rtcd
->
idct
.
idct16
=
vp8_short_idct4x4llm_v6_dual
;
rtcd
->
idct
.
iwalsh1
=
vp8_short_inv_walsh4x4_1_v6
;
rtcd
->
idct
.
iwalsh16
=
vp8_short_inv_walsh4x4_v6
;
rtcd
->
loopfilter
.
normal_mb_v
=
vp8_loop_filter_mbv_armv6
;
...
...
@@ -80,7 +79,6 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
rtcd
->
subpix
.
bilinear4x4
=
vp8_bilinear_predict4x4_neon
;
rtcd
->
idct
.
idct16
=
vp8_short_idct4x4llm_neon
;
rtcd
->
idct
.
iwalsh1
=
vp8_short_inv_walsh4x4_1_neon
;
rtcd
->
idct
.
iwalsh16
=
vp8_short_inv_walsh4x4_neon
;
rtcd
->
loopfilter
.
normal_mb_v
=
vp8_loop_filter_mbv_neon
;
...
...
vp8/common/arm/armv6/iwalsh_v6.asm
View file @
f46e17fd
...
...
@@ -9,7 +9,6 @@
;
EXPORT
|
vp8_short_inv_walsh4x4_v6
|
EXPORT
|
vp8_short_inv_walsh4x4_1_v6
|
ARM
REQUIRE8
...
...
@@ -17,19 +16,19 @@
AREA
|
.text
|
,
CODE
,
READONLY
; name this block of code
;short vp8_short_inv_walsh4x4_v6(short *input, short *
output
)
;short vp8_short_inv_walsh4x4_v6(short *input, short *
mb_dqcoeff
)
|
vp8_short_inv_walsh4x4_v6
|
PROC
stmdb
sp
!
,
{
r4
-
r1
1
,
lr
}
stmdb
sp
!
,
{
r4
-
r1
2
,
lr
}
ldr
r2
,
[
r0
]
,
#
4
; [1 | 0]
ldr
r3
,
[
r0
]
,
#
4
; [3 | 2]
ldr
r4
,
[
r0
]
,
#
4
; [5 | 4]
ldr
r5
,
[
r0
]
,
#
4
; [7 | 6]
ldr
r6
,
[
r0
]
,
#
4
; [9 | 8]
ldr
r7
,
[
r0
]
,
#
4
; [11 | 10]
ldr
r8
,
[
r0
]
,
#
4
; [13 | 12]
ldr
r9
,
[
r0
]
; [15 | 14]
ldr
r2
,
[
r0
,
#
0
]
; [1 | 0]
ldr
r3
,
[
r0
,
#
4
]
; [3 | 2]
ldr
r4
,
[
r0
,
#
8
]
; [5 | 4]
ldr
r5
,
[
r0
,
#
12
]
; [7 | 6]
ldr
r6
,
[
r0
,
#
16
]
; [9 | 8]
ldr
r7
,
[
r0
,
#
20
]
; [11 | 10]
ldr
r8
,
[
r0
,
#
24
]
; [13 | 12]
ldr
r9
,
[
r0
,
#
28
]
; [15 | 14]
qadd16
r10
,
r2
,
r8
; a1 [1+13 | 0+12]
qadd16
r11
,
r4
,
r6
; b1 [5+9 | 4+8]
...
...
@@ -69,24 +68,27 @@
qadd16
r4
,
r4
,
r10
; [b2+3|c2+3]
qadd16
r5
,
r5
,
r10
; [a2+3|d2+3]
asr
r12
,
r2
,
#
3
; [1 | x]
pkhtb
r12
,
r12
,
r3
,
asr
#
19
; [1 | 0]
lsl
lr
,
r3
,
#
16
; [~3 | x]
lsl
r2
,
r2
,
#
16
; [~2 | x]
asr
lr
,
lr
,
#
3
; [3 | x]
pkhtb
lr
,
lr
,
r2
,
asr
#
19
; [3 | 2]
asr
r2
,
r4
,
#
3
; [5 | x]
pkhtb
r2
,
r2
,
r5
,
asr
#
19
; [5 | 4]
lsl
r3
,
r5
,
#
16
; [~7 | x]
lsl
r4
,
r4
,
#
16
; [~6 | x]
asr
r3
,
r3
,
#
3
; [7 | x]
pkhtb
r3
,
r3
,
r4
,
asr
#
19
; [7 | 6]
str
r12
,
[
r1
],
#
4
str
lr
,
[
r1
],
#
4
str
r2
,
[
r1
],
#
4
str
r3
,
[
r1
],
#
4
asr
r12
,
r3
,
#
19
; [0]
strh
r12
,
[
r1
],
#
32
asr
lr
,
r2
,
#
19
; [1]
strh
lr
,
[
r1
],
#
32
sxth
r2
,
r2
sxth
r3
,
r3
asr
r2
,
r2
,
#
3
; [2]
strh
r2
,
[
r1
],
#
32
asr
r3
,
r3
,
#
3
; [3]
strh
r3
,
[
r1
],
#
32
asr
r12
,
r5
,
#
19
; [4]
strh
r12
,
[
r1
],
#
32
asr
lr
,
r4
,
#
19
; [5]
strh
lr
,
[
r1
],
#
32
sxth
r4
,
r4
sxth
r5
,
r5
asr
r4
,
r4
,
#
3
; [6]
strh
r4
,
[
r1
],
#
32
asr
r5
,
r5
,
#
3
; [7]
strh
r5
,
[
r1
],
#
32
qsubaddx
r2
,
r6
,
r7
; [c1|a1] [9-10 | 8+11]
qaddsubx
r3
,
r6
,
r7
; [b1|d1] [9+10 | 8-11]
...
...
@@ -103,50 +105,32 @@
qadd16
r8
,
r8
,
r10
; [b2+3|c2+3]
qadd16
r9
,
r9
,
r10
; [a2+3|d2+3]
asr
r2
,
r6
,
#
3
; [9 | x]
pkhtb
r2
,
r2
,
r7
,
asr
#
19
; [9 | 8]
lsl
r3
,
r7
,
#
16
; [~11| x]
lsl
r4
,
r6
,
#
16
; [~10| x]
asr
r3
,
r3
,
#
3
; [11 | x]
pkhtb
r3
,
r3
,
r4
,
asr
#
19
; [11 | 10]
asr
r4
,
r8
,
#
3
; [13 | x]
pkhtb
r4
,
r4
,
r9
,
asr
#
19
; [13 | 12]
lsl
r5
,
r9
,
#
16
; [~15| x]
lsl
r6
,
r8
,
#
16
; [~14| x]
asr
r5
,
r5
,
#
3
; [15 | x]
pkhtb
r5
,
r5
,
r6
,
asr
#
19
; [15 | 14]
str
r2
,
[
r1
],
#
4
str
r3
,
[
r1
],
#
4
str
r4
,
[
r1
],
#
4
str
r5
,
[
r1
]
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
asr
r12
,
r7
,
#
19
; [8]
strh
r12
,
[
r1
],
#
32
asr
lr
,
r6
,
#
19
; [9]
strh
lr
,
[
r1
],
#
32
sxth
r6
,
r6
sxth
r7
,
r7
asr
r6
,
r6
,
#
3
; [10]
strh
r6
,
[
r1
],
#
32
asr
r7
,
r7
,
#
3
; [11]
strh
r7
,
[
r1
],
#
32
asr
r12
,
r9
,
#
19
; [12]
strh
r12
,
[
r1
],
#
32
asr
lr
,
r8
,
#
19
; [13]
strh
lr
,
[
r1
],
#
32
sxth
r8
,
r8
sxth
r9
,
r9
asr
r8
,
r8
,
#
3
; [14]
strh
r8
,
[
r1
],
#
32
asr
r9
,
r9
,
#
3
; [15]
strh
r9
,
[
r1
],
#
32
ldmia
sp
!
,
{
r4
-
r12
,
pc
}
ENDP
; |vp8_short_inv_walsh4x4_v6|
;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
|
vp8_short_inv_walsh4x4_1_v6
|
PROC
ldrsh
r2
,
[
r0
]
; [0]
add
r2
,
r2
,
#
3
; [0] + 3
asr
r2
,
r2
,
#
3
; a1 ([0]+3) >> 3
lsl
r2
,
r2
,
#
16
; [a1 | x]
orr
r2
,
r2
,
r2
,
lsr
#
16
; [a1 | a1]
str
r2
,
[
r1
],
#
4
str
r2
,
[
r1
],
#
4
str
r2
,
[
r1
],
#
4
str
r2
,
[
r1
],
#
4
str
r2
,
[
r1
],
#
4
str
r2
,
[
r1
],
#
4
str
r2
,
[
r1
],
#
4
str
r2
,
[
r1
]
bx
lr
ENDP
; |vp8_short_inv_walsh4x4_1_v6|
; Constant Pool
c0x00030003
DCD
0x00030003
END
vp8/common/arm/idct_arm.h
View file @
f46e17fd
...
...
@@ -25,9 +25,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
#undef vp8_idct_idct1_scalar_add
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_v6
#undef vp8_idct_iwalsh1
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_v6
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
#endif
...
...
@@ -46,9 +43,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
#undef vp8_idct_idct1_scalar_add
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_neon
#undef vp8_idct_iwalsh1
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
#endif
...
...
vp8/common/arm/neon/iwalsh_neon.asm
View file @
f46e17fd
...
...
@@ -8,7 +8,6 @@
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp8_short_inv_walsh4x4_neon
|
EXPORT
|
vp8_short_inv_walsh4x4_1_neon
|
ARM
REQUIRE8
...
...
@@ -16,7 +15,7 @@
AREA
|
.text
|
,
CODE
,
READONLY
; name this block of code
;short vp8_short_inv_walsh4x4_neon(short *input, short *
output
)
;short vp8_short_inv_walsh4x4_neon(short *input, short *
mb_dqcoeff
)
|
vp8_short_inv_walsh4x4_neon
|
PROC
; read in all four lines of values: d0->d3
...
...
@@ -59,22 +58,30 @@
vshr.s16
q0
,
q0
,
#
3
;e/f >> 3
vshr.s16
q1
,
q1
,
#
3
;g/h >> 3
vst4.i16
{
d0
,
d1
,
d2
,
d3
}
,
[
r1@128
]
mov
r2
,
#
64
add
r3
,
r1
,
#
32
bx
lr
ENDP
; |vp8_short_inv_walsh4x4_neon|
vst1.i16
d0
[
0
],
[
r1
],
r2
vst1.i16
d1
[
0
],
[
r3
],
r2
vst1.i16
d2
[
0
],
[
r1
],
r2
vst1.i16
d3
[
0
],
[
r3
],
r2
vst1.i16
d0
[
1
],
[
r1
],
r2
vst1.i16
d1
[
1
],
[
r3
],
r2
vst1.i16
d2
[
1
],
[
r1
],
r2
vst1.i16
d3
[
1
],
[
r3
],
r2
vst1.i16
d0
[
2
],
[
r1
],
r2
vst1.i16
d1
[
2
],
[
r3
],
r2
vst1.i16
d2
[
2
],
[
r1
],
r2
vst1.i16
d3
[
2
],
[
r3
],
r2
vst1.i16
d0
[
3
],
[
r1
],
r2
vst1.i16
d1
[
3
],
[
r3
],
r2
vst1.i16
d2
[
3
],
[
r1
]
vst1.i16
d3
[
3
],
[
r3
]
;short vp8_short_inv_walsh4x4_1_neon(short *input, short *output)
|
vp8_short_inv_walsh4x4_1_neon
|
PROC
ldrsh
r2
,
[
r0
]
; load input[0]
add
r3
,
r2
,
#
3
; add 3
add
r2
,
r1
,
#
16
; base for last 8 output
asr
r0
,
r3
,
#
3
; right shift 3
vdup.16
q0
,
r0
; load and duplicate
vst1.16
{
q0
}
,
[
r1@128
]
; write back 8
vst1.16
{
q0
}
,
[
r2@128
]
; write back last 8
bx
lr
ENDP
; |vp8_short_inv_walsh4x4_
1_
neon|
ENDP
; |vp8_short_inv_walsh4x4_neon|
END
vp8/common/idct.h
View file @
f46e17fd
...
...
@@ -37,6 +37,10 @@
#define vp8_idct_idct16 vp8_short_idct4x4llm_c
#endif
extern
prototype_idct
(
vp8_idct_idct16
);
/* add this prototype to prevent compiler warning about implicit
* declaration of vp8_short_idct4x4llm_c function in dequantize.c
* when building, for example, neon optimized version */
extern
prototype_idct
(
vp8_short_idct4x4llm_c
);
#ifndef vp8_idct_idct1_scalar_add
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_c
...
...
vp8/common/idctllm.c
View file @
f46e17fd
...
...
@@ -137,8 +137,9 @@ void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
}
void
vp8_short_inv_walsh4x4_c
(
short
*
input
,
short
*
output
)
void
vp8_short_inv_walsh4x4_c
(
short
*
input
,
short
*
mb_dqcoeff
)
{
short
output
[
16
];
int
i
;
int
a1
,
b1
,
c1
,
d1
;
int
a2
,
b2
,
c2
,
d2
;
...
...
@@ -183,22 +184,21 @@ void vp8_short_inv_walsh4x4_c(short *input, short *output)
ip
+=
4
;
op
+=
4
;
}
for
(
i
=
0
;
i
<
16
;
i
++
)
{
mb_dqcoeff
[
i
*
16
]
=
output
[
i
];
}
}
void
vp8_short_inv_walsh4x4_1_c
(
short
*
input
,
short
*
output
)
void
vp8_short_inv_walsh4x4_1_c
(
short
*
input
,
short
*
mb_dqcoeff
)
{
int
i
;
int
a1
;
short
*
op
=
output
;
a1
=
((
input
[
0
]
+
3
)
>>
3
);
for
(
i
=
0
;
i
<
4
;
i
++
)
for
(
i
=
0
;
i
<
16
;
i
++
)
{
op
[
0
]
=
a1
;
op
[
1
]
=
a1
;
op
[
2
]
=
a1
;
op
[
3
]
=
a1
;
op
+=
4
;
mb_dqcoeff
[
i
*
16
]
=
a1
;
}
}
vp8/common/invtrans.c
View file @
f46e17fd
...
...
@@ -28,18 +28,6 @@ void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b,
}
static
void
recon_dcblock
(
MACROBLOCKD
*
x
)
{
BLOCKD
*
b
=
&
x
->
block
[
24
];
int
i
;
for
(
i
=
0
;
i
<
16
;
i
++
)
{
x
->
block
[
i
].
dqcoeff
[
0
]
=
b
->
diff
[
i
];
}
}
void
vp8_inverse_transform_mby
(
const
vp8_idct_rtcd_vtable_t
*
rtcd
,
MACROBLOCKD
*
x
)
{
int
i
;
...
...
@@ -47,9 +35,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *
if
(
x
->
mode_info_context
->
mbmi
.
mode
!=
SPLITMV
)
{
/* do 2nd order transform on the dc block */
IDCT_INVOKE
(
rtcd
,
iwalsh16
)(
x
->
block
[
24
].
dqcoeff
,
x
->
block
[
24
].
diff
);
recon_dcblock
(
x
);
IDCT_INVOKE
(
rtcd
,
iwalsh16
)(
x
->
block
[
24
].
dqcoeff
,
x
->
dqcoeff
);
}
for
(
i
=
0
;
i
<
16
;
i
++
)
...
...
vp8/common/x86/idct_x86.h
View file @
f46e17fd
...
...
@@ -24,7 +24,6 @@ extern prototype_idct(vp8_short_idct4x4llm_mmx);
extern
prototype_idct_scalar_add
(
vp8_dc_only_idct_add_mmx
);
extern
prototype_second_order
(
vp8_short_inv_walsh4x4_mmx
);
extern
prototype_second_order
(
vp8_short_inv_walsh4x4_1_mmx
);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_idct_idct16
...
...
@@ -36,9 +35,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
#undef vp8_idct_iwalsh1
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_mmx
#endif
#endif
...
...
vp8/common/x86/iwalsh_mmx.asm
View file @
f46e17fd
...
...
@@ -11,42 +11,6 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_inv_walsh4x4_1_mmx(short *input, short *output)
global
sym
(
vp8_short_inv_walsh4x4_1_mmx
)
sym
(
vp8_short_inv_walsh4x4_1_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
2
push
rsi
push
rdi
; end prolog
mov
rsi
,
arg
(
0
)
mov
rax
,
3
mov
rdi
,
arg
(
1
)
add
rax
,
[
rsi
]
;input[0] + 3
movd
mm0
,
eax
punpcklwd
mm0
,
mm0
;x x val val
punpckldq
mm0
,
mm0
;val val val val
psraw
mm0
,
3
;(input[0] + 3) >> 3
movq
[
rdi
+
0
],
mm0
movq
[
rdi
+
8
],
mm0
movq
[
rdi
+
16
],
mm0
movq
[
rdi
+
24
],
mm0
; begin epilog
pop
rdi
pop
rsi
UNSHADOW_ARGS
pop
rbp
ret
;void vp8_short_inv_walsh4x4_mmx(short *input, short *output)
global
sym
(
vp8_short_inv_walsh4x4_mmx
)
sym
(
vp8_short_inv_walsh4x4_mmx
):
...
...
@@ -159,10 +123,50 @@ sym(vp8_short_inv_walsh4x4_mmx):
psraw
mm2
,
3
psraw
mm3
,
3
movq
[
rdi
+
0
],
mm0
movq
[
rdi
+
8
],
mm1
movq
[
rdi
+
16
],
mm2
movq
[
rdi
+
24
],
mm3
; movq [rdi + 0], mm0
; movq [rdi + 8], mm1
; movq [rdi + 16], mm2
; movq [rdi + 24], mm3
movd
eax
,
mm0
psrlq
mm0
,
32
mov
word
ptr
[
rdi
+
32
*
0
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
1
],
ax
movd
eax
,
mm0
mov
word
ptr
[
rdi
+
32
*
2
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
3
],
ax
movd
ecx
,
mm1
psrlq
mm1
,
32
mov
word
ptr
[
rdi
+
32
*
4
],
cx
shr
ecx
,
16
mov
word
ptr
[
rdi
+
32
*
5
],
cx
movd
ecx
,
mm1
mov
word
ptr
[
rdi
+
32
*
6
],
cx
shr
ecx
,
16
mov
word
ptr
[
rdi
+
32
*
7
],
cx
movd
eax
,
mm2
psrlq
mm2
,
32
mov
word
ptr
[
rdi
+
32
*
8
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
9
],
ax
movd
eax
,
mm2
mov
word
ptr
[
rdi
+
32
*
10
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
11
],
ax
movd
ecx
,
mm3
psrlq
mm3
,
32
mov
word
ptr
[
rdi
+
32
*
12
],
cx
shr
ecx
,
16
mov
word
ptr
[
rdi
+
32
*
13
],
cx
movd
ecx
,
mm3
mov
word
ptr
[
rdi
+
32
*
14
],
cx
shr
ecx
,
16
mov
word
ptr
[
rdi
+
32
*
15
],
cx
; begin epilog
pop
rdi
...
...
vp8/common/x86/iwalsh_sse2.asm
View file @
f46e17fd
...
...
@@ -96,8 +96,50 @@ sym(vp8_short_inv_walsh4x4_sse2):
psraw
xmm5
,
3
psraw
xmm1
,
3
movdqa
[
rdi
+
0
],
xmm5
movdqa
[
rdi
+
16
],
xmm1
;; movdqa [rdi + 0], xmm5
;; movdqa [rdi + 16], xmm1
movd
eax
,
xmm5
psrldq
xmm5
,
4
mov
word
ptr
[
rdi
+
32
*
0
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
1
],
ax
movd
eax
,
xmm5
psrldq
xmm5
,
4
mov
word
ptr
[
rdi
+
32
*
2
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
3
],
ax
movd
eax
,
xmm5
psrldq
xmm5
,
4
mov
word
ptr
[
rdi
+
32
*
4
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
5
],
ax
movd
eax
,
xmm5
mov
word
ptr
[
rdi
+
32
*
6
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
7
],
ax
movd
eax
,
xmm1
psrldq
xmm1
,
4
mov
word
ptr
[
rdi
+
32
*
8
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
9
],
ax
movd
eax
,
xmm1
psrldq
xmm1
,
4
mov
word
ptr
[
rdi
+
32
*
10
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
11
],
ax
movd
eax
,
xmm1
psrldq
xmm1
,
4
mov
word
ptr
[
rdi
+
32
*
12
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
13
],
ax
movd
eax
,
xmm1
mov
word
ptr
[
rdi
+
32
*
14
],
ax
shr
eax
,
16
mov
word
ptr
[
rdi
+
32
*
15
],
ax
; begin epilog
pop
rdi
...
...
vp8/common/x86/x86_systemdependent.c
View file @
f46e17fd
...
...
@@ -40,9 +40,6 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd
->
idct
.
idct16
=
vp8_short_idct4x4llm_mmx
;
rtcd
->
idct
.
idct1_scalar_add
=
vp8_dc_only_idct_add_mmx
;
rtcd
->
idct
.
iwalsh16
=
vp8_short_inv_walsh4x4_mmx
;
rtcd
->
idct
.
iwalsh1
=
vp8_short_inv_walsh4x4_1_mmx
;
rtcd
->
recon
.
copy8x8
=
vp8_copy_mem8x8_mmx
;
rtcd
->
recon
.
copy8x4
=
vp8_copy_mem8x4_mmx
;
...
...
vp8/decoder/arm/arm_dsystemdependent.c
View file @
f46e17fd
...
...
@@ -32,8 +32,6 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
{
pbi
->
dequant
.
block
=
vp8_dequantize_b_v6
;
pbi
->
dequant
.
idct_add
=
vp8_dequant_idct_add_v6
;
pbi
->
dequant
.
dc_idct_add
=
vp8_dequant_dc_idct_add_v6
;
pbi
->
dequant
.
dc_idct_add_y_block
=
vp8_dequant_dc_idct_add_y_block_v6
;
pbi
->
dequant
.
idct_add_y_block
=
vp8_dequant_idct_add_y_block_v6
;
pbi
->
dequant
.
idct_add_uv_block
=
vp8_dequant_idct_add_uv_block_v6
;
}
...
...
@@ -44,9 +42,6 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
{
pbi
->
dequant
.
block
=
vp8_dequantize_b_neon
;
pbi
->
dequant
.
idct_add
=
vp8_dequant_idct_add_neon
;
/*This is not used: NEON always dequants two blocks at once.
pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_neon;*/
pbi
->
dequant
.
dc_idct_add_y_block
=
vp8_dequant_dc_idct_add_y_block_neon
;
pbi
->
dequant
.
idct_add_y_block
=
vp8_dequant_idct_add_y_block_neon
;
pbi
->
dequant
.
idct_add_uv_block
=
vp8_dequant_idct_add_uv_block_neon
;
}
...
...
vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
deleted
100644 → 0
View file @
e2bacd58
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT
|
vp8_dequant_dc_idct_add_v6
|
AREA
|
.text
|
,
CODE
,
READONLY
;void vp8_dequant_dc_idct_v6(short *input, short *dq,
; unsigned char *dest, int stride, int Dc)
; r0 = input
; r1 = dq
; r2 = dst
; r3 = stride
; sp + 36 = Dc
|
vp8_dequant_dc_idct_add_v6
|
PROC
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
ldr
r6
,
[
sp
,
#
36
]
ldr
r4
,
[
r0
]
;input
ldr
r5
,
[
r1
],
#
4
;dq
sub
sp
,
sp
,
#
4
str
r3
,
[
sp
]
smultt
r7
,
r4
,
r5
ldr
r4
,
[
r0
,
#
4
]
;input
ldr
r5
,
[
r1
],
#
4
;dq
strh
r6
,
[
r0
],
#
2
strh
r7
,
[
r0
],
#
2
smulbb
r6
,
r4
,
r5
smultt
r7
,
r4
,
r5
ldr
r4
,
[
r0
,
#
4
]
;input
ldr
r5
,
[
r1
],
#
4
;dq
strh
r6
,
[
r0
],
#
2
strh
r7
,
[
r0
],
#
2
mov
r12
,
#
3
vp8_dequant_dc_add_loop
smulbb
r6
,
r4
,
r5
smultt
r7
,
r4
,
r5
ldr
r4
,
[
r0
,
#
4
]
;input
ldr
r5
,
[
r1
],
#
4
;dq