Commit 57e72208 authored by Johann's avatar Johann Committed by Gerrit Code Review
Browse files

Merge "Remove ARM optimizations from VP9" into experimental

parents 514e1c93 34591b54
...@@ -109,7 +109,6 @@ ifeq ($(CONFIG_VP9_ENCODER),yes) ...@@ -109,7 +109,6 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS)) CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))
CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS)) CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
CODEC_SRCS-$(ARCH_ARM) += $(VP9_PREFIX)vp98cx_arm.mk
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
......
...@@ -18,30 +18,6 @@ VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) ...@@ -18,30 +18,6 @@ VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no)
VP8_DX_SRCS-yes += vp8_dx_iface.c VP8_DX_SRCS-yes += vp8_dx_iface.c
# common
#define ARM
#define DISABLE_THREAD
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += decoder
# decoder
#define ARM
#define DISABLE_THREAD
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += common
#INCLUDES += decoder
VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c
VP8_DX_SRCS-yes += decoder/dboolhuff.c VP8_DX_SRCS-yes += decoder/dboolhuff.c
VP8_DX_SRCS-yes += decoder/decodemv.c VP8_DX_SRCS-yes += decoder/decodemv.c
......
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_filter_block2d_bil_first_pass_armv6|
EXPORT |vp9_filter_block2d_bil_second_pass_armv6|
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr,
; r1 unsigned short *dst_ptr,
; r2 unsigned int src_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp9_filter
;-------------------------------------
; The output is transposed stroed in output array to make it easy for second pass filtering.
|vp9_filter_block2d_bil_first_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp9_filter address
ldr r4, [sp, #36] ; width
mov r12, r3 ; outer-loop counter
add r7, r2, r4 ; preload next row
pld [r0, r7]
sub r2, r2, r4 ; src increment for height loop
ldr r5, [r11] ; load up filter coefficients
mov r3, r3, lsl #1 ; height*2
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
mov r11, r1 ; save dst_ptr for each row
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_1st_filter
|bil_height_loop_1st_v6|
ldrb r6, [r0] ; load source data
ldrb r7, [r0, #1]
ldrb r8, [r0, #2]
mov lr, r4, lsr #2 ; 4-in-parellel loop counter
|bil_width_loop_1st_v6|
ldrb r9, [r0, #3]
ldrb r10, [r0, #4]
pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0]
pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
smuad r6, r6, r5 ; apply the filter
pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
smuad r7, r7, r5
pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3]
smuad r8, r8, r5
smuad r9, r9, r5
add r0, r0, #4
subs lr, lr, #1
add r6, r6, #0x40 ; round_shift_and_clamp
add r7, r7, #0x40
usat r6, #16, r6, asr #7
usat r7, #16, r7, asr #7
strh r6, [r1], r3 ; result is transposed and stored
add r8, r8, #0x40 ; round_shift_and_clamp
strh r7, [r1], r3
add r9, r9, #0x40
usat r8, #16, r8, asr #7
usat r9, #16, r9, asr #7
strh r8, [r1], r3 ; result is transposed and stored
ldrneb r6, [r0] ; load source data
strh r9, [r1], r3
ldrneb r7, [r0, #1]
ldrneb r8, [r0, #2]
bne bil_width_loop_1st_v6
add r0, r0, r2 ; move to next input row
subs r12, r12, #1
add r9, r2, r4, lsl #1 ; adding back block width
pld [r0, r9] ; preload next row
add r11, r11, #2 ; move over to next column
mov r1, r11
bne bil_height_loop_1st_v6
ldmia sp!, {r4 - r11, pc}
|bil_null_1st_filter|
|bil_height_loop_null_1st|
mov lr, r4, lsr #2 ; loop counter
|bil_width_loop_null_1st|
ldrb r6, [r0] ; load data
ldrb r7, [r0, #1]
ldrb r8, [r0, #2]
ldrb r9, [r0, #3]
strh r6, [r1], r3 ; store it to immediate buffer
add r0, r0, #4
strh r7, [r1], r3
subs lr, lr, #1
strh r8, [r1], r3
strh r9, [r1], r3
bne bil_width_loop_null_1st
subs r12, r12, #1
add r0, r0, r2 ; move to next input line
add r11, r11, #2 ; move over to next column
mov r1, r11
bne bil_height_loop_null_1st
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp9_filter_block2d_bil_first_pass_armv6|
;---------------------------------
; r0 unsigned short *src_ptr,
; r1 unsigned char *dst_ptr,
; r2 int dst_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp9_filter
;---------------------------------
|vp9_filter_block2d_bil_second_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp9_filter address
ldr r4, [sp, #36] ; width
ldr r5, [r11] ; load up filter coefficients
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
mov r11, r1
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_2nd_filter
|bil_height_loop_2nd|
ldr r6, [r0] ; load the data
ldr r8, [r0, #4]
ldrh r10, [r0, #8]
mov lr, r3, lsr #2 ; loop counter
|bil_width_loop_2nd|
pkhtb r7, r6, r8 ; src[1] | src[2]
pkhtb r9, r8, r10 ; src[3] | src[4]
smuad r6, r6, r5 ; apply filter
smuad r8, r8, r5 ; apply filter
subs lr, lr, #1
smuadx r7, r7, r5 ; apply filter
smuadx r9, r9, r5 ; apply filter
add r0, r0, #8
add r6, r6, #0x40 ; round_shift_and_clamp
add r7, r7, #0x40
usat r6, #8, r6, asr #7
usat r7, #8, r7, asr #7
strb r6, [r1], r2 ; the result is transposed back and stored
add r8, r8, #0x40 ; round_shift_and_clamp
strb r7, [r1], r2
add r9, r9, #0x40
usat r8, #8, r8, asr #7
usat r9, #8, r9, asr #7
strb r8, [r1], r2 ; the result is transposed back and stored
ldrne r6, [r0] ; load data
strb r9, [r1], r2
ldrne r8, [r0, #4]
ldrneh r10, [r0, #8]
bne bil_width_loop_2nd
subs r12, r12, #1
add r0, r0, #4 ; update src for next row
add r11, r11, #1
mov r1, r11
bne bil_height_loop_2nd
ldmia sp!, {r4 - r11, pc}
|bil_null_2nd_filter|
|bil_height_loop_null_2nd|
mov lr, r3, lsr #2
|bil_width_loop_null_2nd|
ldr r6, [r0], #4 ; load data
subs lr, lr, #1
ldr r8, [r0], #4
strb r6, [r1], r2 ; store data
mov r7, r6, lsr #16
strb r7, [r1], r2
mov r9, r8, lsr #16
strb r8, [r1], r2
strb r9, [r1], r2
bne bil_width_loop_null_2nd
subs r12, r12, #1
add r0, r0, #4
add r11, r11, #1
mov r1, r11
bne bil_height_loop_null_2nd
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp9_filter_block2d_second_pass_armv6|
END
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_copy_mem16x16_v6|
; ARM
; REQUIRE8
; PRESERVE8
AREA Block, CODE, READONLY ; name this block of code
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|vp9_copy_mem16x16_v6| PROC
stmdb sp!, {r4 - r7}
;push {r4-r7}
;preload
pld [r0, #31] ; preload for next 16x16 block
ands r4, r0, #15
beq copy_mem16x16_fast
ands r4, r0, #7
beq copy_mem16x16_8
ands r4, r0, #3
beq copy_mem16x16_4
;copy one byte each time
ldrb r4, [r0]
ldrb r5, [r0, #1]
ldrb r6, [r0, #2]
ldrb r7, [r0, #3]
mov r12, #16
copy_mem16x16_1_loop
strb r4, [r2]
strb r5, [r2, #1]
strb r6, [r2, #2]
strb r7, [r2, #3]
ldrb r4, [r0, #4]
ldrb r5, [r0, #5]
ldrb r6, [r0, #6]
ldrb r7, [r0, #7]
subs r12, r12, #1
strb r4, [r2, #4]
strb r5, [r2, #5]
strb r6, [r2, #6]
strb r7, [r2, #7]
ldrb r4, [r0, #8]
ldrb r5, [r0, #9]
ldrb r6, [r0, #10]
ldrb r7, [r0, #11]
strb r4, [r2, #8]
strb r5, [r2, #9]
strb r6, [r2, #10]
strb r7, [r2, #11]
ldrb r4, [r0, #12]
ldrb r5, [r0, #13]
ldrb r6, [r0, #14]
ldrb r7, [r0, #15]
add r0, r0, r1
strb r4, [r2, #12]
strb r5, [r2, #13]
strb r6, [r2, #14]
strb r7, [r2, #15]
add r2, r2, r3
ldrneb r4, [r0]
ldrneb r5, [r0, #1]
ldrneb r6, [r0, #2]
ldrneb r7, [r0, #3]
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_1_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
;copy 4 bytes each time
copy_mem16x16_4
ldr r4, [r0]
ldr r5, [r0, #4]
ldr r6, [r0, #8]
ldr r7, [r0, #12]
mov r12, #16
copy_mem16x16_4_loop
subs r12, r12, #1
add r0, r0, r1
str r4, [r2]
str r5, [r2, #4]
str r6, [r2, #8]
str r7, [r2, #12]
add r2, r2, r3
ldrne r4, [r0]
ldrne r5, [r0, #4]
ldrne r6, [r0, #8]
ldrne r7, [r0, #12]
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_4_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
;copy 8 bytes each time
copy_mem16x16_8
sub r1, r1, #16
sub r3, r3, #16
mov r12, #16
copy_mem16x16_8_loop
ldmia r0!, {r4-r5}
;ldm r0, {r4-r5}
ldmia r0!, {r6-r7}
add r0, r0, r1
stmia r2!, {r4-r5}
subs r12, r12, #1
;stm r2, {r4-r5}
stmia r2!, {r6-r7}
add r2, r2, r3
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_8_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
;copy 16 bytes each time
copy_mem16x16_fast
;sub r1, r1, #16
;sub r3, r3, #16
mov r12, #16
copy_mem16x16_fast_loop
ldmia r0, {r4-r7}
;ldm r0, {r4-r7}
add r0, r0, r1
subs r12, r12, #1
stmia r2, {r4-r7}
;stm r2, {r4-r7}
add r2, r2, r3
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_fast_loop
ldmia sp!, {r4 - r7}
;pop {r4-r7}
mov pc, lr
ENDP ; |vp9_copy_mem16x16_v6|
END
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_copy_mem8x4_v6|
; ARM
; REQUIRE8
; PRESERVE8
AREA Block, CODE, READONLY ; name this block of code
;void vp9_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|vp9_copy_mem8x4_v6| PROC
;push {r4-r5}
stmdb sp!, {r4-r5}
;preload
pld [r0]
pld [r0, r1]
pld [r0, r1, lsl #1]
ands r4, r0, #7
beq copy_mem8x4_fast
ands r4, r0, #3
beq copy_mem8x4_4
;copy 1 byte each time
ldrb r4, [r0]
ldrb r5, [r0, #1]
mov r12, #4
copy_mem8x4_1_loop
strb r4, [r2]
strb r5, [r2, #1]
ldrb r4, [r0, #2]
ldrb r5, [r0, #3]
subs r12, r12, #1
strb r4, [r2, #2]
strb r5, [r2, #3]
ldrb r4, [r0, #4]
ldrb r5, [r0, #5]
strb r4, [r2, #4]
strb r5, [r2, #5]
ldrb r4, [r0, #6]
ldrb r5, [r0, #7]
add r0, r0, r1
strb r4, [r2, #6]
strb r5, [r2, #7]
add r2, r2, r3
ldrneb r4, [r0]
ldrneb r5, [r0, #1]
bne copy_mem8x4_1_loop
ldmia sp!, {r4 - r5}
;pop {r4-r5}
mov pc, lr
;copy 4 bytes each time
copy_mem8x4_4
ldr r4, [r0]
ldr r5, [r0, #4]
mov r12, #4
copy_mem8x4_4_loop
subs r12, r12, #1
add r0, r0, r1
str r4, [r2]
str r5, [r2, #4]
add r2, r2, r3
ldrne r4, [r0]
ldrne r5, [r0, #4]
bne copy_mem8x4_4_loop
ldmia sp!, {r4-r5}
;pop {r4-r5}
mov pc, lr
;copy 8 bytes each time
copy_mem8x4_fast
;sub r1, r1, #8
;sub r3, r3, #8
mov r12, #4
copy_mem8x4_fast_loop
ldmia r0, {r4-r5}
;ldm r0, {r4-r5}
add r0, r0, r1
subs r12, r12, #1
stmia r2, {r4-r5}
;stm r2, {r4-r5}
add r2, r2, r3
bne copy_mem8x4_fast_loop
ldmia sp!, {r4-r5}
;pop {r4-r5}
mov pc, lr
ENDP ; |vp9_copy_mem8x4_v6|
END
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved. </