diff --git a/libs.mk b/libs.mk index 4beaa50cbd9ed592311f9f0d43e7fdb34b8ee1d8..9ded3945a71074dab21c1b52cdea9a6ea28b4869 100644 --- a/libs.mk +++ b/libs.mk @@ -93,6 +93,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c endif +CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c new file mode 100644 index 0000000000000000000000000000000000000000..fe62fae134d4cbd7ee3c8b58719df81a80ccae0f --- /dev/null +++ b/vp8/common/arm/arm_systemdependent.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "vpx_ports/arm.h" +#include "g_common.h" +#include "pragmas.h" +#include "subpixel.h" +#include "loopfilter.h" +#include "recon.h" +#include "idct.h" +#include "onyxc_int.h" + +extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x); + +extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x); + +void vp8_arch_arm_common_init(VP8_COMMON *ctx) +{ +#if CONFIG_RUNTIME_CPU_DETECT + VP8_COMMON_RTCD *rtcd = &ctx->rtcd; + int flags = arm_cpu_caps(); + int has_edsp = flags & HAS_EDSP; + int has_media = flags & HAS_MEDIA; + int has_neon = flags & HAS_NEON; + rtcd->flags = flags; + + /* Override default functions with fastest ones for this CPU. */ +#if HAVE_ARMV6 + if (has_media) + { + rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6; + rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6; + rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6; + rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6; + rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6; + rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6; + rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6; + rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6; + + rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6; + rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual; + rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_v6; + rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_v6; + + rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6; + rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6; + rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6; + rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6; + rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6; + rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6; + rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6; + rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6; + + rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6; + rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6; + rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6; + rtcd->recon.recon = vp8_recon_b_armv6; + rtcd->recon.recon2 = vp8_recon2b_armv6; + rtcd->recon.recon4 = vp8_recon4b_armv6; + } +#endif + +#if HAVE_ARMV7 + if (has_neon) + { + rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon; + rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon; + rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon; + rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon; + rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon; + rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon; + rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon; + rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon; + + rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon; + rtcd->idct.idct16 = vp8_short_idct4x4llm_neon; + rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon; + rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon; + + rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon; + rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon; + rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon; + rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon; + rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon; + rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon; + rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon; + rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon; + + rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon; + rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon; + rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon; + rtcd->recon.recon = vp8_recon_b_neon; + rtcd->recon.recon2 = vp8_recon2b_neon; + rtcd->recon.recon4 = vp8_recon4b_neon; + } +#endif + +#endif + +#if HAVE_ARMV6 +#if CONFIG_RUNTIME_CPU_DETECT + if (has_media) +#endif + { + vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby; + vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s; + } +#endif + +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (has_neon) +#endif + { + vp8_build_intra_predictors_mby_ptr = + vp8_build_intra_predictors_mby_neon; + vp8_build_intra_predictors_mby_s_ptr = + vp8_build_intra_predictors_mby_s_neon; + } +#endif +} diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h index f28d7f649ea0c541c2b62c8f4c524db6df9b317f..8b8d179172b841f1b2be3226c1756e5201ca1b25 100644 --- a/vp8/common/arm/idct_arm.h +++ b/vp8/common/arm/idct_arm.h @@ -19,6 +19,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6); extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6); extern prototype_second_order(vp8_short_inv_walsh4x4_v6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_idct_idct1 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6 @@ -34,6 +35,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6); #undef vp8_idct_iwalsh16 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6 #endif +#endif #if HAVE_ARMV7 extern prototype_idct(vp8_short_idct4x4llm_1_neon); @@ -42,6 +44,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon); extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon); extern prototype_second_order(vp8_short_inv_walsh4x4_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_idct_idct1 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon @@ -57,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon); #undef vp8_idct_iwalsh16 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon #endif +#endif #endif diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h index 6c3628ae939e7138e9d32ca0213d4f142ea65726..cd62207d7012961cba8b9aa7109d266cc4b9d609 100644 --- a/vp8/common/arm/loopfilter_arm.h +++ b/vp8/common/arm/loopfilter_arm.h @@ -22,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6); extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6); extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_lf_normal_mb_v #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6 @@ -46,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6); #undef vp8_lf_simple_b_h #define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6 #endif +#endif #if HAVE_ARMV7 extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon); @@ -57,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon); extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon); extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_lf_normal_mb_v #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon @@ -81,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon); #undef vp8_lf_simple_b_h #define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon #endif +#endif #endif diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h index 18855a3c0365fd83bed2190d5270ed8b57c1c30b..c30f6dc2dcf9a69612d8e0993ee8839477a0843d 100644 --- a/vp8/common/arm/recon_arm.h +++ b/vp8/common/arm/recon_arm.h @@ -21,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6); extern prototype_copy_block(vp8_copy_mem8x4_v6); extern prototype_copy_block(vp8_copy_mem16x16_v6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_recon #define vp8_recon_recon vp8_recon_b_armv6 @@ -39,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6); #undef vp8_recon_copy16x16 #define vp8_recon_copy16x16 vp8_copy_mem16x16_v6 #endif +#endif #if HAVE_ARMV7 extern prototype_recon_block(vp8_recon_b_neon); @@ -49,6 +51,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon); extern prototype_copy_block(vp8_copy_mem8x4_neon); extern prototype_copy_block(vp8_copy_mem16x16_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_recon #define vp8_recon_recon vp8_recon_b_neon @@ -67,5 +70,6 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon); #undef vp8_recon_copy16x16 #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon #endif +#endif #endif diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h index 53600e547968efee0d7fce071ee72d5606012ace..6288538d06fa19b208d54170009157e5a2682f64 100644 --- a/vp8/common/arm/subpixel_arm.h +++ b/vp8/common/arm/subpixel_arm.h @@ -22,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6); extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6); extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_subpix_sixtap16x16 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6 @@ -46,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6); #undef vp8_subpix_bilinear4x4 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6 #endif +#endif #if HAVE_ARMV7 extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon); @@ -57,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon); extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon); extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_subpix_sixtap16x16 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon @@ -81,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon); #undef vp8_subpix_bilinear4x4 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon #endif +#endif #endif diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c deleted file mode 100644 index 1eed97e02f50ed991a82209514496b192d79cb9d..0000000000000000000000000000000000000000 --- a/vp8/common/arm/systemdependent.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "g_common.h" -#include "pragmas.h" -#include "subpixel.h" -#include "loopfilter.h" -#include "recon.h" -#include "idct.h" -#include "onyxc_int.h" - -void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x); - -void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x); - -void vp8_machine_specific_config(VP8_COMMON *ctx) -{ -#if CONFIG_RUNTIME_CPU_DETECT - VP8_COMMON_RTCD *rtcd = &ctx->rtcd; - -#if HAVE_ARMV7 - rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon; - rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon; - rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon; - rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon; - rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon; - rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon; - rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon; - rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon; - - rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon; - rtcd->idct.idct16 = vp8_short_idct4x4llm_neon; - rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon; - rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon; - - rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon; - rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon; - rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon; - rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon; - rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon; - rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon; - - rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon; - rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon; - rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon; - rtcd->recon.recon = vp8_recon_b_neon; - rtcd->recon.recon2 = vp8_recon2b_neon; - rtcd->recon.recon4 = vp8_recon4b_neon; -#elif HAVE_ARMV6 - - rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6; - rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6; - rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6; - rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6; - rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6; - rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6; - rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6; - rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6; - - rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6; - rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual; - rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_armv6; - rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_armv6; - - rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6; - rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6; - rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6; - rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6; - rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6; - rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6; - - rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6; - rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6; - rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6; - rtcd->recon.recon = vp8_recon_b_armv6; - rtcd->recon.recon2 = vp8_recon2b_armv6; - rtcd->recon.recon4 = vp8_recon4b_armv6; -#else -//pure c - rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c; - rtcd->idct.idct16 = vp8_short_idct4x4llm_c; - rtcd->idct.idct1_scalar = vp8_dc_only_idct_c; - rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; - rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; - - rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; - rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; - rtcd->recon.copy8x4 = vp8_copy_mem8x4_c; - rtcd->recon.recon = vp8_recon_b_c; - rtcd->recon.recon2 = vp8_recon2b_c; - rtcd->recon.recon4 = vp8_recon4b_c; - - rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c; - rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c; - rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_c; - rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_c; - rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c; - rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_c; - rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_c; - rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_c; - - rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c; - rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c; - rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c; - rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c; - rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c; - rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c; -#endif - -#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR) - rtcd->postproc.down = vp8_mbpost_proc_down_c; - rtcd->postproc.across = vp8_mbpost_proc_across_ip_c; - rtcd->postproc.downacross = vp8_post_proc_down_and_across_c; - rtcd->postproc.addnoise = vp8_plane_add_noise_c; -#endif -#endif - -#if HAVE_ARMV7 - vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon; - vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon; -#elif HAVE_ARMV6 - vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby; - vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s; -#else - vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby; - vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s; - -#endif - -} diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index c04e31ffe3ca87a5b3f1f4cdaf693a044b5c975d..0ef375e334fa2b72a4f0a3638ab593d44bd2b3b7 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -18,6 +18,7 @@ #include "onyxc_int.h" extern void vp8_arch_x86_common_init(VP8_COMMON *ctx); +extern void vp8_arch_arm_common_init(VP8_COMMON *ctx); void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x); extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x); @@ -77,4 +78,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) vp8_arch_x86_common_init(ctx); #endif +#if ARCH_ARM + vp8_arch_arm_common_init(ctx); +#endif + } diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 4966002f52134a8ab5805cb927f240bb4a0eebaa..d12143d4d911870171b524386ba7dd5c7937fd16 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD vp8_subpix_rtcd_vtable_t subpix; vp8_loopfilter_rtcd_vtable_t loopfilter; vp8_postproc_rtcd_vtable_t postproc; + int flags; #else int unused; #endif diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c new file mode 100644 index 0000000000000000000000000000000000000000..77cff47db5b146b4517474ebb71c10efe6019671 --- /dev/null +++ b/vp8/decoder/arm/arm_dsystemdependent.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "vpx_ports/arm.h" +#include "blockd.h" +#include "pragmas.h" +#include "postproc.h" +#include "dboolhuff.h" +#include "dequantize.h" +#include "onyxd_int.h" + +void vp8_arch_arm_decode_init(VP8D_COMP *pbi) +{ +#if CONFIG_RUNTIME_CPU_DETECT + int flags = pbi->common.rtcd.flags; + int has_edsp = flags & HAS_EDSP; + int has_media = flags & HAS_MEDIA; + int has_neon = flags & HAS_NEON; + +#if HAVE_ARMV6 + if (has_media) + { + pbi->dequant.block = vp8_dequantize_b_v6; + pbi->dequant.idct_add = vp8_dequant_idct_add_v6; + pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_v6; + pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6; + pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6; + pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6; +#if 0 //For use with RTCD, when implemented + pbi->dboolhuff.start = vp8dx_start_decode_c; + pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; + pbi->dboolhuff.debool = vp8dx_decode_bool_c; + pbi->dboolhuff.devalue = vp8dx_decode_value_c; +#endif + } +#endif + +#if HAVE_ARMV7 + if (has_neon) + { + pbi->dequant.block = vp8_dequantize_b_neon; + pbi->dequant.idct_add = vp8_dequant_idct_add_neon; + /*This is not used: NEON always dequants two blocks at once. + pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_neon;*/ + pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon; + pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon; + pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; +#if 0 //For use with RTCD, when implemented + pbi->dboolhuff.start = vp8dx_start_decode_c; + pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; + pbi->dboolhuff.debool = vp8dx_decode_bool_c; + pbi->dboolhuff.devalue = vp8dx_decode_value_c; +#endif + } +#endif +#endif +} diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h index 40151e01ab9a1676df4f51c5257baf965c561bf6..b7d800d2603c639b39071e1f9fa367a0cde49f0c 100644 --- a/vp8/decoder/arm/dequantize_arm.h +++ b/vp8/decoder/arm/dequantize_arm.h @@ -20,6 +20,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6) extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6); extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_dequant_block #define vp8_dequant_block vp8_dequantize_b_v6 @@ -38,6 +39,7 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6); #undef vp8_dequant_idct_add_uv_block #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6 #endif +#endif #if HAVE_ARMV7 extern prototype_dequant_block(vp8_dequantize_b_neon); @@ -47,6 +49,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neo extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon); extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_dequant_block #define vp8_dequant_block vp8_dequantize_b_neon @@ -65,5 +68,6 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon); #undef vp8_dequant_idct_add_uv_block #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon #endif +#endif #endif diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c deleted file mode 100644 index 9dcf7b657f2e8678ed537a9685f119a147902731..0000000000000000000000000000000000000000 --- a/vp8/decoder/arm/dsystemdependent.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "blockd.h" -#include "pragmas.h" -#include "postproc.h" -#include "dboolhuff.h" -#include "dequantize.h" -#include "onyxd_int.h" - -void vp8_dmachine_specific_config(VP8D_COMP *pbi) -{ -#if CONFIG_RUNTIME_CPU_DETECT - pbi->mb.rtcd = &pbi->common.rtcd; -#if HAVE_ARMV7 - pbi->dequant.block = vp8_dequantize_b_neon; - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; - pbi->dboolhuff.debool = vp8dx_decode_bool_c; - pbi->dboolhuff.devalue = vp8dx_decode_value_c; - -#elif HAVE_ARMV6 - pbi->dequant.block = vp8_dequantize_b_v6; - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; - pbi->dboolhuff.debool = vp8dx_decode_bool_c; - pbi->dboolhuff.devalue = vp8dx_decode_value_c; -#endif -#endif -} diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c index 60f2af5b823773608ea4da9466dbab60d5465f59..84de7af435ddf69a08299077d6696207e0704a8d 100644 --- a/vp8/decoder/generic/dsystemdependent.c +++ b/vp8/decoder/generic/dsystemdependent.c @@ -14,6 +14,7 @@ #include "onyxd_int.h" extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi); +extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi); void vp8_dmachine_specific_config(VP8D_COMP *pbi) { @@ -37,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) #if ARCH_X86 || ARCH_X86_64 vp8_arch_x86_decode_init(pbi); #endif + +#if ARCH_ARM + vp8_arch_arm_decode_init(pbi); +#endif } diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 884c38da0605b07d505d02e897adde2514c1c307..b5a6e3e858bb918fccf4dd49e9d598e21c35b94a 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -30,6 +30,9 @@ #include "systemdependent.h" #include "vpx_ports/vpx_timer.h" #include "detokenize.h" +#if ARCH_ARM +#include "vpx_ports/arm.h" +#endif extern void vp8_init_loop_filter(VP8_COMMON *cm); extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); @@ -224,7 +227,6 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C #if HAVE_ARMV7 extern void vp8_push_neon(INT64 *store); extern void vp8_pop_neon(INT64 *store); -static INT64 dx_store_reg[8]; #endif static int get_free_fb (VP8_COMMON *cm) @@ -312,6 +314,9 @@ static int swap_frame_buffers (VP8_COMMON *cm) int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp) { +#if HAVE_ARMV7 + INT64 dx_store_reg[8]; +#endif VP8D_COMP *pbi = (VP8D_COMP *) ptr; VP8_COMMON *cm = &pbi->common; int retcode = 0; @@ -327,10 +332,27 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign pbi->common.error.error_code = VPX_CODEC_OK; +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_push_neon(dx_store_reg); + } +#endif + cm->new_fb_idx = get_free_fb (cm); if (setjmp(pbi->common.error.jmp)) { +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } +#endif pbi->common.error.setjmp = 0; if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; @@ -339,10 +361,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign pbi->common.error.setjmp = 1; -#if HAVE_ARMV7 - vp8_push_neon(dx_store_reg); -#endif - vpx_usec_timer_start(&timer); //cm->current_video_frame++; @@ -354,7 +372,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign if (retcode < 0) { #if HAVE_ARMV7 - vp8_pop_neon(dx_store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } #endif pbi->common.error.error_code = VPX_CODEC_ERROR; pbi->common.error.setjmp = 0; @@ -367,6 +390,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign { if (swap_frame_buffers (cm)) { +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } +#endif pbi->common.error.error_code = VPX_CODEC_ERROR; pbi->common.error.setjmp = 0; return -1; @@ -375,6 +406,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign { if (swap_frame_buffers (cm)) { +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } +#endif pbi->common.error.error_code = VPX_CODEC_ERROR; pbi->common.error.setjmp = 0; return -1; @@ -455,7 +494,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign #endif #if HAVE_ARMV7 - vp8_pop_neon(dx_store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } #endif pbi->common.error.setjmp = 0; return retcode; diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c new file mode 100644 index 0000000000000000000000000000000000000000..8736fcf1d603d5db6f70c2d236b8d5795c2749c3 --- /dev/null +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "vpx_ports/arm.h" +#include "variance.h" +#include "onyx_int.h" + +extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); +extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); +extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); + +void vp8_arch_arm_encoder_init(VP8_COMP *cpi) +{ +#if CONFIG_RUNTIME_CPU_DETECT + int flags = cpi->common.rtcd.flags; + int has_edsp = flags & HAS_EDSP; + int has_media = flags & HAS_MEDIA; + int has_neon = flags & HAS_NEON; + +#if HAVE_ARMV6 + if (has_media) + { + /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; + cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; + cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; + cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; + cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/ + + /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; + cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; + cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; + cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; + cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/ + + /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; + cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; + cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/ + + /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; + cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ + + /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; + cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; + cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; + cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/ + + /*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; + cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; + cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;*/ + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6; + + /*cpi->rtcd.encodemb.berr = vp8_block_error_c; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; + cpi->rtcd.encodemb.subb = vp8_subtract_b_c; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/ + + /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/ + } +#endif + +#if HAVE_ARMV7 + if (has_neon) + { + cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon; + cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon; + cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon; + cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon; + cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon; + + /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/ + cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon; + cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon; + cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon; + cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon; + + /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/ + cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon; + /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon; + + cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; + /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ + + cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon; + /*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; + cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/ + cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; + + cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon; + cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon; + cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon; + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon; + + /*cpi->rtcd.encodemb.berr = vp8_block_error_c; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/ + cpi->rtcd.encodemb.subb = vp8_subtract_b_neon; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon; + + /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/ + /* The neon quantizer has not been updated to match the new exact + * quantizer introduced in commit e04e2935 + */ + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/ + } +#endif + +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (has_neon) +#endif + { + vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; + } +#endif +#endif +} diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm similarity index 95% rename from vp8/encoder/arm/neon/boolhuff_armv7.asm rename to vp8/encoder/arm/armv5te/boolhuff_armv5te.asm index 9c4823c51fc2ce3869016f1bf87a076e3e877393..e78dc33229fe872ad9891f65de2dd8e0c9a0e1b0 100644 --- a/vp8/encoder/arm/neon/boolhuff_armv7.asm +++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm @@ -205,17 +205,10 @@ token_count_lt_zero_se ldr r5, [r0, #vp8_writer_range] ldr r3, [r0, #vp8_writer_count] - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r11, r1 rsb r4, r10, #32 ; 32-n ; v is kept in r1 during the token pack loop - lsr r1, r11, r4 ; v >>= 32 - n + lsl r1, r1, r4 ; r1 = v << 32 - n encode_value_loop sub r7, r5, #1 ; range-1 @@ -223,7 +216,7 @@ encode_value_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r1, r1, #1 ; bit = v >> n + lsls r1, r1, #1 ; bit = v >> n mov r4, r7, lsl #7 ; ((range-1) * 128) mov r7, #1 diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm similarity index 93% rename from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm rename to vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm index c19ac8250649a5b9d4f2153f65294446daad6574..3233d2a96688d87018bab8e872c21058134940fb 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm @@ -9,7 +9,7 @@ ; - EXPORT |vp8cx_pack_tokens_armv7| + EXPORT |vp8cx_pack_tokens_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -25,7 +25,7 @@ ; r3 vp8_coef_encodings ; s0 vp8_extra_bits ; s1 vp8_coef_tree -|vp8cx_pack_tokens_armv7| PROC +|vp8cx_pack_tokens_armv5| PROC push {r4-r11, lr} ; Add size of xcount * sizeof (TOKENEXTRA) to get stop @@ -57,18 +57,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #52] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -78,7 +71,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -172,16 +165,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm similarity index 94% rename from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm rename to vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm index 0756455869dc595c2d529d87dd5fb5ce74c705f9..a9b552ae175bfb7f30528c116fdf95f537aa6abe 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm @@ -9,7 +9,7 @@ ; - EXPORT |vp8cx_pack_mb_row_tokens_armv7| + EXPORT |vp8cx_pack_mb_row_tokens_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -25,7 +25,7 @@ ; r3 vp8_extra_bits ; s0 vp8_coef_tree -|vp8cx_pack_mb_row_tokens_armv7| PROC +|vp8cx_pack_mb_row_tokens_armv5| PROC push {r4-r11, lr} sub sp, sp, #24 @@ -78,18 +78,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #60] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -99,7 +92,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -193,16 +186,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm similarity index 95% rename from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm rename to vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm index 10a3d985125572df07bd4fc750e388c2dcc29894..0835164e5e8755ed0b648eca1263a668e4f84e2f 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm @@ -9,7 +9,7 @@ ; - EXPORT |vp8cx_pack_tokens_into_partitions_armv7| + EXPORT |vp8cx_pack_tokens_into_partitions_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -27,7 +27,7 @@ ; s1 vp8_extra_bits, ; s2 const vp8_tree_index *, -|vp8cx_pack_tokens_into_partitions_armv7| PROC +|vp8cx_pack_tokens_into_partitions_armv5| PROC push {r4-r11, lr} sub sp, sp, #44 @@ -106,18 +106,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #88] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -127,7 +120,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -221,16 +214,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c deleted file mode 100644 index 8d70d635aefd6f9fa24a268e1819ec3fffbdd670..0000000000000000000000000000000000000000 --- a/vp8/encoder/arm/csystemdependent.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "variance.h" -#include "onyx_int.h" - -void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); - -void vp8_cmachine_specific_config(VP8_COMP *cpi) -{ -#if CONFIG_RUNTIME_CPU_DETECT - cpi->rtcd.common = &cpi->common.rtcd; - -#if HAVE_ARMV7 - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_neon; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; - /* The neon quantizer has not been updated to match the new exact - * quantizer introduced in commit e04e2935 - */ - /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/ -#elif HAVE_ARMV6 - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_c; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; -#else - //pure c - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_c; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; -#endif -#endif - -#if HAVE_ARMV7 - vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; -#else - vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; -#endif -} diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h index 774599bf030c893f434817c8c008906f64ed1321..41fa5d1928df9bba55dc30c5858f5236baabcd64 100644 --- a/vp8/encoder/arm/dct_arm.h +++ b/vp8/encoder/arm/dct_arm.h @@ -15,9 +15,11 @@ #if HAVE_ARMV6 extern prototype_fdct(vp8_short_walsh4x4_armv6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6 #endif +#endif #if HAVE_ARMV7 extern prototype_fdct(vp8_short_fdct4x4_neon); @@ -26,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon); extern prototype_fdct(vp8_fast_fdct8x4_neon); extern prototype_fdct(vp8_short_walsh4x4_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_neon @@ -40,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon); #undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon +#endif #endif diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h index eb699433f4c79d3967dc75efaf7a5a201cac90a2..8fe453735594d97effe18b2c4d77d0dca8df3082 100644 --- a/vp8/encoder/arm/encodemb_arm.h +++ b/vp8/encoder/arm/encodemb_arm.h @@ -30,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon); //#undef vp8_encodemb_mbuverr //#define vp8_encodemb_mbuverr vp8_mbuverror_c +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_encodemb_subb #define vp8_encodemb_subb vp8_subtract_b_neon @@ -38,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon); #undef vp8_encodemb_submbuv #define vp8_encodemb_submbuv vp8_subtract_mbuv_neon +#endif #endif diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index 859e43f51524214bf88b4555c8860de8199c492d..fb9dd5a5b0402a94778b7d0a1084c6bec9119e01 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -38,6 +38,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon); //extern prototype_variance2(vp8_get16x16var_c); extern prototype_sad(vp8_get4x4sse_cs_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_variance_sad4x4 #define vp8_variance_sad4x4 vp8_sad4x4_neon @@ -100,6 +101,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon); #undef vp8_variance_get4x4sse_cs #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon +#endif #endif diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h index 559631338d7d33639c6b8cbffb58a83557a08f0f..f5d148ea477482d2833324ea191a0c14526b58bc 100644 --- a/vp8/encoder/bitstream.h +++ b/vp8/encoder/bitstream.h @@ -12,25 +12,25 @@ #ifndef __INC_BITSTREAM_H #define __INC_BITSTREAM_H -#if HAVE_ARMV7 -void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount, +#if HAVE_ARMV5TE +void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount, vp8_token *, vp8_extra_bit_struct *, const vp8_tree_index *); -void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *, +void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *, vp8_token *, vp8_extra_bit_struct *, const vp8_tree_index *); -void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w, +void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w, vp8_token *, vp8_extra_bit_struct *, const vp8_tree_index *); # define pack_tokens(a,b,c) \ - vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) + vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) # define pack_tokens_into_partitions(a,b,c,d) \ - vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) + vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) # define pack_mb_row_tokens(a,b) \ - vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) + vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) #else # define pack_tokens(a,b,c) pack_tokens_c(a,b,c) # define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d) diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 1acb73d9cbd04ac085be89c98975ac14e4ae3c77..520b08f51cae258f9d49287dcdfeff3a0b1ecc0b 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -15,6 +15,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi); +void vp8_arch_arm_encoder_init(VP8_COMP *cpi); void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); @@ -94,4 +95,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) vp8_arch_x86_encoder_init(cpi); #endif +#if ARCH_ARM + vp8_arch_arm_encoder_init(cpi); +#endif + } diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 53d68be52c5b9b812dfc5692e3aa89999e3a143d..7e1583dd9a2b7389731e314c1e22fe97a84141e6 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -31,6 +31,9 @@ #include "vpx_ports/vpx_timer.h" #include "vpxerrors.h" #include "temporal_filter.h" +#if ARCH_ARM +#include "vpx_ports/arm.h" +#endif #include <math.h> #include <stdio.h> @@ -2106,8 +2109,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA))); CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1)); - vp8_cmachine_specific_config(cpi); vp8_create_common(&cpi->common); + vp8_cmachine_specific_config(cpi); vp8_init_config((VP8_PTR)cpi, oxcf); @@ -2852,9 +2855,20 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source); #if HAVE_ARMV7 - vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source); -#else - vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source); + } #endif cpi->Source = &cpi->scaled_source; @@ -4624,10 +4638,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, #if HAVE_ARMV7 extern void vp8_push_neon(INT64 *store); extern void vp8_pop_neon(INT64 *store); -static INT64 store_reg[8]; #endif int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time) { + INT64 store_reg[8]; VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer timer; @@ -4636,7 +4650,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON return -1; #if HAVE_ARMV7 - vp8_push_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_push_neon(store_reg); + } #endif vpx_usec_timer_start(&timer); @@ -4645,7 +4664,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames) { #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return -1; } @@ -4686,9 +4710,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON s->source_time_stamp = time_stamp; s->source_frame_flags = frame_flags; #if HAVE_ARMV7 - vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer); -#else - vp8_yv12_copy_frame_ptr(sd, &s->source_buffer); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_ptr(sd, &s->source_buffer); + } #endif cpi->source_buffer_count = 1; } @@ -4697,14 +4732,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return 0; } int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush) { - + INT64 store_reg[8]; VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer tsctimer; @@ -4715,7 +4755,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon return -1; #if HAVE_ARMV7 - vp8_push_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_push_neon(store_reg); + } #endif vpx_usec_timer_start(&cmptimer); @@ -4867,7 +4912,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon #endif #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return -1; } @@ -4910,7 +4960,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon if (!cpi) { #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return 0; } @@ -5099,7 +5154,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon #endif #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return 0; diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index 79e07dbc0e8e0a383cf1d0c5ec741bdc343d5006..09e8b5412b5895fd3050b2c0941f5047037578ec 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -16,6 +16,9 @@ #include "vpx_scale/yv12extend.h" #include "vpx_scale/vpxscale.h" #include "alloccommon.h" +#if ARCH_ARM +#include "vpx_ports/arm.h" +#endif extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val); extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val, int sharpness_lvl); @@ -306,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Make a copy of the unfiltered / processed recon buffer #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf); -#else - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf); + } #endif if (cm->frame_type == KEY_FRAME) @@ -343,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Re-instate the unfiltered frame #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); -#else - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); + } #endif while (filter_step > 0) @@ -372,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Re-instate the unfiltered frame #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); -#else - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); + } #endif // If value is close to the best so far then bias towards a lower loop filter value. @@ -401,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Re-instate the unfiltered frame #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); -#else - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); + } #endif // Was it better than the previous best? diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index ecca18a0a461d379918b975a5d3b9516a74538ec..3b5aaa54889b415005832e29ed9b615bfd616a27 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -112,6 +112,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm endif +VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c + # common (c) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c @@ -119,15 +121,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/recon_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra4x4_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c -VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/systemdependent.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/vpx_asm_offsets.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/filter_c.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/idctllm.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/recon.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/reconintra4x4.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/generic/systemdependent.c - # common (armv6) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x4_v6$(ASM) diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index 1424bd15a2bbda115dbc4ef1973a3fc275d44234..d126faf32fcd98346ed1eaf7518dc164ee543005 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -13,17 +13,22 @@ #File list for arm # encoder -VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/csystemdependent.c +VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/boolhuff_arm.c +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/mcomp_arm.c -VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6) += encoder/generic/csystemdependent.c -VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/boolhuff.c -VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/mcomp.c +VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c + +#File list for armv5te +# encoder +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/boolhuff_armv5te$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM) #File list for armv6 # encoder @@ -44,10 +49,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_armv7$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/boolhuff_armv7$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/vpx_vp8_enc_asm_offsets.c diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk index ae0610cdac2e82714ec25d746110c2f732379c96..0803a9cb0958169b3aac7f479a1ee0df4267b50a 100644 --- a/vp8/vp8dx_arm.mk +++ b/vp8/vp8dx_arm.mk @@ -11,11 +11,9 @@ #VP8_DX_SRCS list is modified according to different platforms. +VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c + VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dsystemdependent.c -VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/generic/dsystemdependent.c -VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/dequantize.c -VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/idct_blk.c VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM) #File list for armv6 diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h new file mode 100644 index 0000000000000000000000000000000000000000..81af1f11ffecd8f8832258fa2fc2751ccf736286 --- /dev/null +++ b/vpx_ports/arm.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_PORTS_ARM_H +#define VPX_PORTS_ARM_H +#include <stdlib.h> +#include "config.h" + +/*ARMv5TE "Enhanced DSP" instructions.*/ +#define HAS_EDSP 0x01 +/*ARMv6 "Parallel" or "Media" instructions.*/ +#define HAS_MEDIA 0x02 +/*ARMv7 optional NEON instructions.*/ +#define HAS_NEON 0x04 + +int arm_cpu_caps(void); + +#endif + diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c new file mode 100644 index 0000000000000000000000000000000000000000..4109924cf3ad2f8ee1ce97031d092184fe56f749 --- /dev/null +++ b/vpx_ports/arm_cpudetect.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> +#include "arm.h" + +static int arm_cpu_env_flags(int *flags) +{ + char *env; + env = getenv("VPX_SIMD_CAPS"); + if (env && *env) + { + *flags = (int)strtol(env, NULL, 0); + return 0; + } + *flags = 0; + return -1; +} + +static int arm_cpu_env_mask(void) +{ + char *env; + env = getenv("VPX_SIMD_CAPS_MASK"); + return env && *env ? (int)strtol(env, NULL, 0) : ~0; +} + + +#if defined(_MSC_VER) +/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ +#define WIN32_LEAN_AND_MEAN +#define WIN32_EXTRA_LEAN +#include <windows.h> + +int arm_cpu_caps(void) +{ + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) + { + return flags; + } + mask = arm_cpu_env_mask(); + /* MSVC has no inline __asm support for ARM, but it does let you __emit + * instructions via their assembled hex code. + * All of these instructions should be essentially nops. + */ +#if defined(HAVE_ARMV5TE) + if (mask & HAS_EDSP) + { + __try + { + /*PLD [r13]*/ + __emit(0xF5DDF000); + flags |= HAS_EDSP; + } + __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) + { + /*Ignore exception.*/ + } + } +#if defined(HAVE_ARMV6) + if (mask & HAS_MEDIA) + __try + { + /*SHADD8 r3,r3,r3*/ + __emit(0xE6333F93); + flags |= HAS_MEDIA; + } + __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) + { + /*Ignore exception.*/ + } + } +#if defined(HAVE_ARMV7) + if (mask & HAS_NEON) + { + __try + { + /*VORR q0,q0,q0*/ + __emit(0xF2200150); + flags |= HAS_NEON; + } + __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) + { + /*Ignore exception.*/ + } + } +#endif +#endif +#endif + return flags & mask; +} + +#elif defined(__linux__) +#include <stdio.h> + +int arm_cpu_caps(void) +{ + FILE *fin; + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) + { + return flags; + } + mask = arm_cpu_env_mask(); + /* Reading /proc/self/auxv would be easier, but that doesn't work reliably + * on Android. + * This also means that detection will fail in Scratchbox. + */ + fin = fopen("/proc/cpuinfo","r"); + if(fin != NULL) + { + /* 512 should be enough for anybody (it's even enough for all the flags + * that x86 has accumulated... so far). + */ + char buf[512]; + while (fgets(buf, 511, fin) != NULL) + { +#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7) + if (memcmp(buf, "Features", 8) == 0) + { + char *p; +#if defined(HAVE_ARMV5TE) + p=strstr(buf, " edsp"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) + { + flags |= HAS_EDSP; + } +#if defined(HAVE_ARMV7) + p = strstr(buf, " neon"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) + { + flags |= HAS_NEON; + } +#endif +#endif + } +#endif +#if defined(HAVE_ARMV6) + if (memcmp(buf, "CPU architecture:",17) == 0){ + int version; + version = atoi(buf+17); + if (version >= 6) + { + flags |= HAS_MEDIA; + } + } +#endif + } + fclose(fin); + } + return flags & mask; +} + +#elif !CONFIG_RUNTIME_CPU_DETECT + +int arm_cpu_caps(void) +{ + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) + { + return flags; + } + mask = arm_cpu_env_mask(); +#if defined(HAVE_ARMV5TE) + flags |= HAS_EDSP; +#endif +#if defined(HAVE_ARMV6) + flags |= HAS_MEDIA; +#endif +#if defined(HAVE_ARMV7) + flags |= HAS_NEON; +#endif + return flags & mask; +} + +#else +#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ + "available for your platform. Reconfigure without --enable-runtime-cpu-detect." +#endif diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c index 1e8bcb89d35b13cf278153e760729880fe0d7ae5..fee76fff78c0acb6539be0989762cbea0d33b535 100644 --- a/vpx_scale/arm/scalesystemdependant.c +++ b/vpx_scale/arm/scalesystemdependant.c @@ -10,6 +10,7 @@ #include "vpx_ports/config.h" +#include "vpx_ports/arm.h" #include "vpx_scale/vpxscale.h" @@ -47,6 +48,9 @@ extern void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CO ****************************************************************************/ void vp8_scale_machine_specific_config() { +#if HAVE_ARMV7 && CONFIG_RUNTIME_CPU_DETECT + int flags; +#endif /* vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_armv4; vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_armv4; @@ -73,14 +77,20 @@ void vp8_scale_machine_specific_config() vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; */ -#if HAVE_ARMV7 - vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon; - vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly_neon; - vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame_neon; -#else +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders; vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly; vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame; #endif - +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + flags = arm_cpu_caps(); + if (flags & HAS_NEON) +#endif + { + vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon; + vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly_neon; + vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame_neon; + } +#endif }