Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Yushin Cho
aom-rav1e
Commits
a9056729
Commit
a9056729
authored
Dec 02, 2012
by
Johann
Committed by
John Koleszar
Dec 05, 2012
Browse files
Remove ARM optimizations from VP9
Change-Id: I9f0ae635fb9a95c4aa1529c177ccb07e2b76970b
parent
4a9b9547
Changes
112
Expand all
Hide whitespace changes
Inline
Side-by-side
libs.mk
View file @
a9056729
...
...
@@ -109,7 +109,6 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
CODEC_SRCS-yes
+=
$(
addprefix
$(VP9_PREFIX)
,
$(
call
enabled,VP9_CX_SRCS
))
CODEC_EXPORTS-yes
+=
$(
addprefix
$(VP9_PREFIX)
,
$(VP9_CX_EXPORTS)
)
CODEC_SRCS-yes
+=
$(VP9_PREFIX)
vp9cx.mk vpx/vp8.h vpx/vp8cx.h
CODEC_SRCS-$(ARCH_ARM)
+=
$(VP9_PREFIX)
vp98cx_arm.mk
INSTALL-LIBS-yes
+=
include/vpx/vp8.h include/vpx/vp8cx.h
INSTALL_MAPS
+=
include/vpx/%
$(SRC_PATH_BARE)
/
$(VP9_PREFIX)
/%
CODEC_DOC_SRCS
+=
vpx/vp8.h vpx/vp8cx.h
...
...
vp8/vp8dx.mk
View file @
a9056729
...
...
@@ -18,30 +18,6 @@ VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no)
VP8_DX_SRCS-yes
+=
vp8_dx_iface.c
# common
#define ARM
#define DISABLE_THREAD
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += decoder
# decoder
#define ARM
#define DISABLE_THREAD
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += decoder
VP8_DX_SRCS-yes
+=
decoder/asm_dec_offsets.c
VP8_DX_SRCS-yes
+=
decoder/dboolhuff.c
VP8_DX_SRCS-yes
+=
decoder/decodemv.c
...
...
vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm
deleted
100644 → 0
View file @
4a9b9547
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp9_filter_block2d_bil_first_pass_armv6
|
EXPORT
|
vp9_filter_block2d_bil_second_pass_armv6
|
AREA
|
.text
|
,
CODE
,
READONLY
; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr,
; r1 unsigned short *dst_ptr,
; r2 unsigned int src_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp9_filter
;-------------------------------------
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
vp9_filter_block2d_bil_first_pass_armv6
|
PROC
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
ldr
r11
,
[
sp
,
#
40
]
; vp9_filter address
ldr
r4
,
[
sp
,
#
36
]
; width
mov
r12
,
r3
; outer-loop counter
add
r7
,
r2
,
r4
; preload next row
pld
[
r0
,
r7
]
sub
r2
,
r2
,
r4
; src increment for height loop
ldr
r5
,
[
r11
]
; load up filter coefficients
mov
r3
,
r3
,
lsl
#
1
; height*2
add
r3
,
r3
,
#
2
; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
mov
r11
,
r1
; save dst_ptr for each row
cmp
r5
,
#
128
; if filter coef = 128, then skip the filter
beq
bil_null_1st_filter
|
bil_height_loop_1st_v6
|
ldrb
r6
,
[
r0
]
; load source data
ldrb
r7
,
[
r0
,
#
1
]
ldrb
r8
,
[
r0
,
#
2
]
mov
lr
,
r4
,
lsr
#
2
; 4-in-parellel loop counter
|
bil_width_loop_1st_v6
|
ldrb
r9
,
[
r0
,
#
3
]
ldrb
r10
,
[
r0
,
#
4
]
pkhbt
r6
,
r6
,
r7
,
lsl
#
16
; src[1] | src[0]
pkhbt
r7
,
r7
,
r8
,
lsl
#
16
; src[2] | src[1]
smuad
r6
,
r6
,
r5
; apply the filter
pkhbt
r8
,
r8
,
r9
,
lsl
#
16
; src[3] | src[2]
smuad
r7
,
r7
,
r5
pkhbt
r9
,
r9
,
r10
,
lsl
#
16
; src[4] | src[3]
smuad
r8
,
r8
,
r5
smuad
r9
,
r9
,
r5
add
r0
,
r0
,
#
4
subs
lr
,
lr
,
#
1
add
r6
,
r6
,
#
0x40
; round_shift_and_clamp
add
r7
,
r7
,
#
0x40
usat
r6
,
#
16
,
r6
,
asr
#
7
usat
r7
,
#
16
,
r7
,
asr
#
7
strh
r6
,
[
r1
],
r3
; result is transposed and stored
add
r8
,
r8
,
#
0x40
; round_shift_and_clamp
strh
r7
,
[
r1
],
r3
add
r9
,
r9
,
#
0x40
usat
r8
,
#
16
,
r8
,
asr
#
7
usat
r9
,
#
16
,
r9
,
asr
#
7
strh
r8
,
[
r1
],
r3
; result is transposed and stored
ldrneb
r6
,
[
r0
]
; load source data
strh
r9
,
[
r1
],
r3
ldrneb
r7
,
[
r0
,
#
1
]
ldrneb
r8
,
[
r0
,
#
2
]
bne
bil_width_loop_1st_v6
add
r0
,
r0
,
r2
; move to next input row
subs
r12
,
r12
,
#
1
add
r9
,
r2
,
r4
,
lsl
#
1
; adding back block width
pld
[
r0
,
r9
]
; preload next row
add
r11
,
r11
,
#
2
; move over to next column
mov
r1
,
r11
bne
bil_height_loop_1st_v6
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
|
bil_null_1st_filter
|
|
bil_height_loop_null_1st
|
mov
lr
,
r4
,
lsr
#
2
; loop counter
|
bil_width_loop_null_1st
|
ldrb
r6
,
[
r0
]
; load data
ldrb
r7
,
[
r0
,
#
1
]
ldrb
r8
,
[
r0
,
#
2
]
ldrb
r9
,
[
r0
,
#
3
]
strh
r6
,
[
r1
],
r3
; store it to immediate buffer
add
r0
,
r0
,
#
4
strh
r7
,
[
r1
],
r3
subs
lr
,
lr
,
#
1
strh
r8
,
[
r1
],
r3
strh
r9
,
[
r1
],
r3
bne
bil_width_loop_null_1st
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
r2
; move to next input line
add
r11
,
r11
,
#
2
; move over to next column
mov
r1
,
r11
bne
bil_height_loop_null_1st
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
ENDP
; |vp9_filter_block2d_bil_first_pass_armv6|
;---------------------------------
; r0 unsigned short *src_ptr,
; r1 unsigned char *dst_ptr,
; r2 int dst_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp9_filter
;---------------------------------
|
vp9_filter_block2d_bil_second_pass_armv6
|
PROC
stmdb
sp
!
,
{
r4
-
r11
,
lr
}
ldr
r11
,
[
sp
,
#
40
]
; vp9_filter address
ldr
r4
,
[
sp
,
#
36
]
; width
ldr
r5
,
[
r11
]
; load up filter coefficients
mov
r12
,
r4
; outer-loop counter = width, since we work on transposed data matrix
mov
r11
,
r1
cmp
r5
,
#
128
; if filter coef = 128, then skip the filter
beq
bil_null_2nd_filter
|
bil_height_loop_2nd
|
ldr
r6
,
[
r0
]
; load the data
ldr
r8
,
[
r0
,
#
4
]
ldrh
r10
,
[
r0
,
#
8
]
mov
lr
,
r3
,
lsr
#
2
; loop counter
|
bil_width_loop_2nd
|
pkhtb
r7
,
r6
,
r8
; src[1] | src[2]
pkhtb
r9
,
r8
,
r10
; src[3] | src[4]
smuad
r6
,
r6
,
r5
; apply filter
smuad
r8
,
r8
,
r5
; apply filter
subs
lr
,
lr
,
#
1
smuadx
r7
,
r7
,
r5
; apply filter
smuadx
r9
,
r9
,
r5
; apply filter
add
r0
,
r0
,
#
8
add
r6
,
r6
,
#
0x40
; round_shift_and_clamp
add
r7
,
r7
,
#
0x40
usat
r6
,
#
8
,
r6
,
asr
#
7
usat
r7
,
#
8
,
r7
,
asr
#
7
strb
r6
,
[
r1
],
r2
; the result is transposed back and stored
add
r8
,
r8
,
#
0x40
; round_shift_and_clamp
strb
r7
,
[
r1
],
r2
add
r9
,
r9
,
#
0x40
usat
r8
,
#
8
,
r8
,
asr
#
7
usat
r9
,
#
8
,
r9
,
asr
#
7
strb
r8
,
[
r1
],
r2
; the result is transposed back and stored
ldrne
r6
,
[
r0
]
; load data
strb
r9
,
[
r1
],
r2
ldrne
r8
,
[
r0
,
#
4
]
ldrneh
r10
,
[
r0
,
#
8
]
bne
bil_width_loop_2nd
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
#
4
; update src for next row
add
r11
,
r11
,
#
1
mov
r1
,
r11
bne
bil_height_loop_2nd
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
|
bil_null_2nd_filter
|
|
bil_height_loop_null_2nd
|
mov
lr
,
r3
,
lsr
#
2
|
bil_width_loop_null_2nd
|
ldr
r6
,
[
r0
],
#
4
; load data
subs
lr
,
lr
,
#
1
ldr
r8
,
[
r0
],
#
4
strb
r6
,
[
r1
],
r2
; store data
mov
r7
,
r6
,
lsr
#
16
strb
r7
,
[
r1
],
r2
mov
r9
,
r8
,
lsr
#
16
strb
r8
,
[
r1
],
r2
strb
r9
,
[
r1
],
r2
bne
bil_width_loop_null_2nd
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
#
4
add
r11
,
r11
,
#
1
mov
r1
,
r11
bne
bil_height_loop_null_2nd
ldmia
sp
!
,
{
r4
-
r11
,
pc
}
ENDP
; |vp9_filter_block2d_second_pass_armv6|
END
vp9/common/arm/armv6/vp9_copymem16x16_v6.asm
deleted
100644 → 0
View file @
4a9b9547
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp9_copy_mem16x16_v6
|
; ARM
; REQUIRE8
; PRESERVE8
AREA
Bl
ock
,
CODE
,
READONLY
; name this block of code
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
vp9_copy_mem16x16_v6
|
PROC
stmdb
sp
!
,
{
r4
-
r7
}
;push {r4-r7}
;preload
pld
[
r0
,
#
31
]
; preload for next 16x16 block
ands
r4
,
r0
,
#
15
beq
copy_mem16x16_fast
ands
r4
,
r0
,
#
7
beq
copy_mem16x16_8
ands
r4
,
r0
,
#
3
beq
copy_mem16x16_4
;copy one byte each time
ldrb
r4
,
[
r0
]
ldrb
r5
,
[
r0
,
#
1
]
ldrb
r6
,
[
r0
,
#
2
]
ldrb
r7
,
[
r0
,
#
3
]
mov
r12
,
#
16
copy_mem16x16_1_loop
strb
r4
,
[
r2
]
strb
r5
,
[
r2
,
#
1
]
strb
r6
,
[
r2
,
#
2
]
strb
r7
,
[
r2
,
#
3
]
ldrb
r4
,
[
r0
,
#
4
]
ldrb
r5
,
[
r0
,
#
5
]
ldrb
r6
,
[
r0
,
#
6
]
ldrb
r7
,
[
r0
,
#
7
]
subs
r12
,
r12
,
#
1
strb
r4
,
[
r2
,
#
4
]
strb
r5
,
[
r2
,
#
5
]
strb
r6
,
[
r2
,
#
6
]
strb
r7
,
[
r2
,
#
7
]
ldrb
r4
,
[
r0
,
#
8
]
ldrb
r5
,
[
r0
,
#
9
]
ldrb
r6
,
[
r0
,
#
10
]
ldrb
r7
,
[
r0
,
#
11
]
strb
r4
,
[
r2
,
#
8
]
strb
r5
,
[
r2
,
#
9
]
strb
r6
,
[
r2
,
#
10
]
strb
r7
,
[
r2
,
#
11
]
ldrb
r4
,
[
r0
,
#
12
]
ldrb
r5
,
[
r0
,
#
13
]
ldrb
r6
,
[
r0
,
#
14
]
ldrb
r7
,
[
r0
,
#
15
]
add
r0
,
r0
,
r1
strb
r4
,
[
r2
,
#
12
]
strb
r5
,
[
r2
,
#
13
]
strb
r6
,
[
r2
,
#
14
]
strb
r7
,
[
r2
,
#
15
]
add
r2
,
r2
,
r3
ldrneb
r4
,
[
r0
]
ldrneb
r5
,
[
r0
,
#
1
]
ldrneb
r6
,
[
r0
,
#
2
]
ldrneb
r7
,
[
r0
,
#
3
]
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_1_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
;copy 4 bytes each time
copy_mem16x16_4
ldr
r4
,
[
r0
]
ldr
r5
,
[
r0
,
#
4
]
ldr
r6
,
[
r0
,
#
8
]
ldr
r7
,
[
r0
,
#
12
]
mov
r12
,
#
16
copy_mem16x16_4_loop
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
r1
str
r4
,
[
r2
]
str
r5
,
[
r2
,
#
4
]
str
r6
,
[
r2
,
#
8
]
str
r7
,
[
r2
,
#
12
]
add
r2
,
r2
,
r3
ldrne
r4
,
[
r0
]
ldrne
r5
,
[
r0
,
#
4
]
ldrne
r6
,
[
r0
,
#
8
]
ldrne
r7
,
[
r0
,
#
12
]
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_4_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
;copy 8 bytes each time
copy_mem16x16_8
sub
r1
,
r1
,
#
16
sub
r3
,
r3
,
#
16
mov
r12
,
#
16
copy_mem16x16_8_loop
ldmia
r0
!
,
{
r4
-
r5
}
;ldm r0, {r4-r5}
ldmia
r0
!
,
{
r6
-
r7
}
add
r0
,
r0
,
r1
stmia
r2
!
,
{
r4
-
r5
}
subs
r12
,
r12
,
#
1
;stm r2, {r4-r5}
stmia
r2
!
,
{
r6
-
r7
}
add
r2
,
r2
,
r3
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_8_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
;copy 16 bytes each time
copy_mem16x16_fast
;sub r1, r1, #16
;sub r3, r3, #16
mov
r12
,
#
16
copy_mem16x16_fast_loop
ldmia
r0
,
{
r4
-
r7
}
;ldm r0, {r4-r7}
add
r0
,
r0
,
r1
subs
r12
,
r12
,
#
1
stmia
r2
,
{
r4
-
r7
}
;stm r2, {r4-r7}
add
r2
,
r2
,
r3
pld
[
r0
,
#
31
]
; preload for next 16x16 block
bne
copy_mem16x16_fast_loop
ldmia
sp
!
,
{
r4
-
r7
}
;pop {r4-r7}
mov
pc
,
lr
ENDP
; |vp9_copy_mem16x16_v6|
END
vp9/common/arm/armv6/vp9_copymem8x4_v6.asm
deleted
100644 → 0
View file @
4a9b9547
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp9_copy_mem8x4_v6
|
; ARM
; REQUIRE8
; PRESERVE8
AREA
Bl
ock
,
CODE
,
READONLY
; name this block of code
;void vp9_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
vp9_copy_mem8x4_v6
|
PROC
;push {r4-r5}
stmdb
sp
!
,
{
r4
-
r5
}
;preload
pld
[
r0
]
pld
[
r0
,
r1
]
pld
[
r0
,
r1
,
lsl
#
1
]
ands
r4
,
r0
,
#
7
beq
copy_mem8x4_fast
ands
r4
,
r0
,
#
3
beq
copy_mem8x4_4
;copy 1 byte each time
ldrb
r4
,
[
r0
]
ldrb
r5
,
[
r0
,
#
1
]
mov
r12
,
#
4
copy_mem8x4_1_loop
strb
r4
,
[
r2
]
strb
r5
,
[
r2
,
#
1
]
ldrb
r4
,
[
r0
,
#
2
]
ldrb
r5
,
[
r0
,
#
3
]
subs
r12
,
r12
,
#
1
strb
r4
,
[
r2
,
#
2
]
strb
r5
,
[
r2
,
#
3
]
ldrb
r4
,
[
r0
,
#
4
]
ldrb
r5
,
[
r0
,
#
5
]
strb
r4
,
[
r2
,
#
4
]
strb
r5
,
[
r2
,
#
5
]
ldrb
r4
,
[
r0
,
#
6
]
ldrb
r5
,
[
r0
,
#
7
]
add
r0
,
r0
,
r1
strb
r4
,
[
r2
,
#
6
]
strb
r5
,
[
r2
,
#
7
]
add
r2
,
r2
,
r3
ldrneb
r4
,
[
r0
]
ldrneb
r5
,
[
r0
,
#
1
]
bne
copy_mem8x4_1_loop
ldmia
sp
!
,
{
r4
-
r5
}
;pop {r4-r5}
mov
pc
,
lr
;copy 4 bytes each time
copy_mem8x4_4
ldr
r4
,
[
r0
]
ldr
r5
,
[
r0
,
#
4
]
mov
r12
,
#
4
copy_mem8x4_4_loop
subs
r12
,
r12
,
#
1
add
r0
,
r0
,
r1
str
r4
,
[
r2
]
str
r5
,
[
r2
,
#
4
]
add
r2
,
r2
,
r3
ldrne
r4
,
[
r0
]
ldrne
r5
,
[
r0
,
#
4
]
bne
copy_mem8x4_4_loop
ldmia
sp
!
,
{
r4
-
r5
}
;pop {r4-r5}
mov
pc
,
lr
;copy 8 bytes each time
copy_mem8x4_fast
;sub r1, r1, #8
;sub r3, r3, #8
mov
r12
,
#
4
copy_mem8x4_fast_loop
ldmia
r0
,
{
r4
-
r5
}
;ldm r0, {r4-r5}
add
r0
,
r0
,
r1
subs
r12
,
r12
,
#
1
stmia
r2
,
{
r4
-
r5
}
;stm r2, {r4-r5}
add
r2
,
r2
,
r3
bne
copy_mem8x4_fast_loop
ldmia
sp
!
,
{
r4
-
r5
}
;pop {r4-r5}
mov
pc
,
lr
ENDP
; |vp9_copy_mem8x4_v6|
END
vp9/common/arm/armv6/vp9_copymem8x8_v6.asm
deleted
100644 → 0
View file @
4a9b9547
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may