Commit b1b4ba1b authored by Christian Duvivier's avatar Christian Duvivier
Browse files

Properly save neon registers.

Replace current code which corrupts the stack by
duplicate of vp8 code to save and restore neon
registers.

Change-Id: Ibb0220b9aa985d10533befa0a455ebce57a2891a
parent db60c02c
......@@ -29,17 +29,19 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
extern void save_neon_registers();
extern void restore_neon_registers();
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
extern void vp9_push_neon(int64_t *store);
extern void vp9_pop_neon(int64_t *store);
void vp9_short_idct16x16_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
save_neon_registers();
vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
......@@ -102,18 +104,19 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
dest_stride);
// restore d8-d15 register values.
restore_neon_registers();
vp9_pop_neon(store_reg);
return;
}
void vp9_short_idct16x16_10_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
save_neon_registers();
vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
......@@ -163,7 +166,7 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
dest_stride);
// restore d8-d15 register values.
restore_neon_registers();
vp9_pop_neon(store_reg);
return;
}
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_push_neon|
EXPORT |vp9_pop_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
|vp9_push_neon| PROC
vst1.i64 {d8, d9, d10, d11}, [r0]!
vst1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
ENDP
|vp9_pop_neon| PROC
vld1.i64 {d8, d9, d10, d11}, [r0]!
vld1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
ENDP
END
......@@ -12,8 +12,6 @@
EXPORT |vp9_short_idct16x16_add_neon_pass2|
EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
EXPORT |save_neon_registers|
EXPORT |restore_neon_registers|
ARM
REQUIRE8
PRESERVE8
......@@ -1178,14 +1176,4 @@ end_idct10_16x16_pass2
pop {r3-r9}
bx lr
ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
;void |save_neon_registers|()
|save_neon_registers| PROC
vpush {d8-d15}
bx lr
ENDP ; |save_registers|
;void |restore_neon_registers|()
|restore_neon_registers| PROC
vpop {d8-d15}
bx lr
ENDP ; |restore_registers|
END
......@@ -108,5 +108,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment