diff --git a/libs.mk b/libs.mk
index 4beaa50cbd9ed592311f9f0d43e7fdb34b8ee1d8..9ded3945a71074dab21c1b52cdea9a6ea28b4869 100644
--- a/libs.mk
+++ b/libs.mk
@@ -93,6 +93,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
 endif
+CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
 CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
 CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
new file mode 100644
index 0000000000000000000000000000000000000000..fe62fae134d4cbd7ee3c8b58719df81a80ccae0f
--- /dev/null
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -0,0 +1,134 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+    int flags = arm_cpu_caps();
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+    rtcd->flags = flags;
+
+    /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_v6;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_v6;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
+        rtcd->recon.recon       = vp8_recon_b_armv6;
+        rtcd->recon.recon2      = vp8_recon2b_armv6;
+        rtcd->recon.recon4      = vp8_recon4b_armv6;
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
+        rtcd->recon.recon       = vp8_recon_b_neon;
+        rtcd->recon.recon2      = vp8_recon2b_neon;
+        rtcd->recon.recon4      = vp8_recon4b_neon;
+    }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_media)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+        vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr =
+         vp8_build_intra_predictors_mby_neon;
+        vp8_build_intra_predictors_mby_s_ptr =
+         vp8_build_intra_predictors_mby_s_neon;
+    }
+#endif
+}
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index f28d7f649ea0c541c2b62c8f4c524db6df9b317f..8b8d179172b841f1b2be3226c1756e5201ca1b25 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -19,6 +19,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
 
@@ -34,6 +35,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 #undef  vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_idct(vp8_short_idct4x4llm_1_neon);
@@ -42,6 +44,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
 
@@ -57,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 #undef  vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index 6c3628ae939e7138e9d32ca0213d4f142ea65726..cd62207d7012961cba8b9aa7109d266cc4b9d609 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -22,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
 extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
 extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_lf_normal_mb_v
 #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
 
@@ -46,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
 #undef  vp8_lf_simple_b_h
 #define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
@@ -57,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
 extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
 extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_lf_normal_mb_v
 #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
 
@@ -81,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
 #undef  vp8_lf_simple_b_h
 #define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index 18855a3c0365fd83bed2190d5270ed8b57c1c30b..c30f6dc2dcf9a69612d8e0993ee8839477a0843d 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -21,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6);
 extern prototype_copy_block(vp8_copy_mem8x4_v6);
 extern prototype_copy_block(vp8_copy_mem16x16_v6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_armv6
 
@@ -39,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6);
 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_recon_block(vp8_recon_b_neon);
@@ -49,6 +51,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
 extern prototype_copy_block(vp8_copy_mem8x4_neon);
 extern prototype_copy_block(vp8_copy_mem16x16_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_neon
 
@@ -67,5 +70,6 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index 53600e547968efee0d7fce071ee72d5606012ace..6288538d06fa19b208d54170009157e5a2682f64 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -22,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
 extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
 extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_subpix_sixtap16x16
 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
 
@@ -46,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
 #undef  vp8_subpix_bilinear4x4
 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
@@ -57,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
 extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
 extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_subpix_sixtap16x16
 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
 
@@ -81,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
 #undef  vp8_subpix_bilinear4x4
 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
deleted file mode 100644
index 1eed97e02f50ed991a82209514496b192d79cb9d..0000000000000000000000000000000000000000
--- a/vp8/common/arm/systemdependent.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
-
-void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
-
-void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
-
-void vp8_machine_specific_config(VP8_COMMON *ctx)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-#if HAVE_ARMV7
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
-    rtcd->recon.recon       = vp8_recon_b_neon;
-    rtcd->recon.recon2      = vp8_recon2b_neon;
-    rtcd->recon.recon4      = vp8_recon4b_neon;
-#elif HAVE_ARMV6
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_armv6;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_armv6;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
-    rtcd->recon.recon       = vp8_recon_b_armv6;
-    rtcd->recon.recon2      = vp8_recon2b_armv6;
-    rtcd->recon.recon4      = vp8_recon4b_armv6;
-#else
-//pure c
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
-    rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_c;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_c;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_c;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_c;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_c;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_c;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_c;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_c;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
-#endif
-
-#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
-    rtcd->postproc.down        = vp8_mbpost_proc_down_c;
-    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
-    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
-    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon;
-#elif HAVE_ARMV6
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-#else
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-
-#endif
-
-}
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c04e31ffe3ca87a5b3f1f4cdaf693a044b5c975d..0ef375e334fa2b72a4f0a3638ab593d44bd2b3b7 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -18,6 +18,7 @@
 #include "onyxc_int.h"
 
 extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
 
 void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
 extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@@ -77,4 +78,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     vp8_arch_x86_common_init(ctx);
 #endif
 
+#if ARCH_ARM
+    vp8_arch_arm_common_init(ctx);
+#endif
+
 }
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 4966002f52134a8ab5805cb927f240bb4a0eebaa..d12143d4d911870171b524386ba7dd5c7937fd16 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
     vp8_subpix_rtcd_vtable_t      subpix;
     vp8_loopfilter_rtcd_vtable_t  loopfilter;
     vp8_postproc_rtcd_vtable_t    postproc;
+    int                           flags;
 #else
     int unused;
 #endif
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
new file mode 100644
index 0000000000000000000000000000000000000000..77cff47db5b146b4517474ebb71c10efe6019671
--- /dev/null
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -0,0 +1,66 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = pbi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_v6;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_v6;
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_v6;
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_neon;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_neon;
+        /*This is not used: NEON always dequants two blocks at once.
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_neon;*/
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index 40151e01ab9a1676df4f51c5257baf965c561bf6..b7d800d2603c639b39071e1f9fa367a0cde49f0c 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -20,6 +20,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6)
 extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
 
@@ -38,6 +39,7 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
 #undef vp8_dequant_idct_add_uv_block
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_dequant_block(vp8_dequantize_b_neon);
@@ -47,6 +49,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neo
 extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_neon
 
@@ -65,5 +68,6 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
 #undef vp8_dequant_idct_add_uv_block
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
deleted file mode 100644
index 9dcf7b657f2e8678ed537a9685f119a147902731..0000000000000000000000000000000000000000
--- a/vp8/decoder/arm/dsystemdependent.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dboolhuff.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
-
-void vp8_dmachine_specific_config(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    pbi->mb.rtcd         = &pbi->common.rtcd;
-#if HAVE_ARMV7
-    pbi->dequant.block   = vp8_dequantize_b_neon;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-
-#elif HAVE_ARMV6
-    pbi->dequant.block   = vp8_dequantize_b_v6;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
-#endif
-}
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 60f2af5b823773608ea4da9466dbab60d5465f59..84de7af435ddf69a08299077d6696207e0704a8d 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -14,6 +14,7 @@
 #include "onyxd_int.h"
 
 extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
 
 void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 {
@@ -37,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 #if ARCH_X86 || ARCH_X86_64
     vp8_arch_x86_decode_init(pbi);
 #endif
+
+#if ARCH_ARM
+    vp8_arch_arm_decode_init(pbi);
+#endif
 }
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 884c38da0605b07d505d02e897adde2514c1c307..b5a6e3e858bb918fccf4dd49e9d598e21c35b94a 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -30,6 +30,9 @@
 #include "systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
 #include "detokenize.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
 
 extern void vp8_init_loop_filter(VP8_COMMON *cm);
 extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
@@ -224,7 +227,6 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
 #if HAVE_ARMV7
 extern void vp8_push_neon(INT64 *store);
 extern void vp8_pop_neon(INT64 *store);
-static INT64 dx_store_reg[8];
 #endif
 
 static int get_free_fb (VP8_COMMON *cm)
@@ -312,6 +314,9 @@ static int swap_frame_buffers (VP8_COMMON *cm)
 
 int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
 {
+#if HAVE_ARMV7
+    INT64 dx_store_reg[8];
+#endif
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
     int retcode = 0;
@@ -327,10 +332,27 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->common.error.error_code = VPX_CODEC_OK;
 
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(dx_store_reg);
+    }
+#endif
+
     cm->new_fb_idx = get_free_fb (cm);
 
     if (setjmp(pbi->common.error.jmp))
     {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
+#endif
         pbi->common.error.setjmp = 0;
         if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
           cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
@@ -339,10 +361,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->common.error.setjmp = 1;
 
-#if HAVE_ARMV7
-    vp8_push_neon(dx_store_reg);
-#endif
-
     vpx_usec_timer_start(&timer);
 
     //cm->current_video_frame++;
@@ -354,7 +372,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     if (retcode < 0)
     {
 #if HAVE_ARMV7
-        vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
 #endif
         pbi->common.error.error_code = VPX_CODEC_ERROR;
         pbi->common.error.setjmp = 0;
@@ -367,6 +390,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     {
         if (swap_frame_buffers (cm))
         {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
             pbi->common.error.error_code = VPX_CODEC_ERROR;
             pbi->common.error.setjmp = 0;
             return -1;
@@ -375,6 +406,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     {
         if (swap_frame_buffers (cm))
         {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
             pbi->common.error.error_code = VPX_CODEC_ERROR;
             pbi->common.error.setjmp = 0;
             return -1;
@@ -455,7 +494,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 #endif
 
 #if HAVE_ARMV7
-    vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(dx_store_reg);
+    }
 #endif
     pbi->common.error.setjmp = 0;
     return retcode;
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
new file mode 100644
index 0000000000000000000000000000000000000000..8736fcf1d603d5db6f70c2d236b8d5795c2749c3
--- /dev/null
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "variance.h"
+#include "onyx_int.h"
+
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = cpi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        /*cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;*/
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
+        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
+        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        /*cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
+        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
+
+        /*cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;*/
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;*/
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;*/
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
+        /*cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
+
+        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
+        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
+        /*cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;*/
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
+
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;*/
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+        /* The neon quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+    }
+#endif
+#endif
+}
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
similarity index 95%
rename from vp8/encoder/arm/neon/boolhuff_armv7.asm
rename to vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index 9c4823c51fc2ce3869016f1bf87a076e3e877393..e78dc33229fe872ad9891f65de2dd8e0c9a0e1b0 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -205,17 +205,10 @@ token_count_lt_zero_se
     ldr     r5, [r0, #vp8_writer_range]
     ldr     r3, [r0, #vp8_writer_count]
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r11, r1
     rsb     r4, r10, #32                 ; 32-n
 
     ; v is kept in r1 during the token pack loop
-    lsr     r1, r11, r4                 ; v >>= 32 - n
+    lsl     r1, r1, r4                  ; r1 = v << 32 - n
 
 encode_value_loop
     sub     r7, r5, #1                  ; range-1
@@ -223,7 +216,7 @@ encode_value_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r1, r1, #1                  ; bit = v >> n
+    lsls    r1, r1, #1                  ; bit = v >> n
     mov     r4, r7, lsl #7              ; ((range-1) * 128)
 
     mov     r7, #1
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
similarity index 93%
rename from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
rename to vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index c19ac8250649a5b9d4f2153f65294446daad6574..3233d2a96688d87018bab8e872c21058134940fb 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT |vp8cx_pack_tokens_armv7|
+    EXPORT |vp8cx_pack_tokens_armv5|
 
     INCLUDE vpx_vp8_enc_asm_offsets.asm
 
@@ -25,7 +25,7 @@
 ; r3 vp8_coef_encodings
 ; s0 vp8_extra_bits
 ; s1 vp8_coef_tree
-|vp8cx_pack_tokens_armv7| PROC
+|vp8cx_pack_tokens_armv5| PROC
     push    {r4-r11, lr}
 
     ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
@@ -57,18 +57,11 @@ while_p_lt_stop
     movne   lr, #2                      ; i = 2
     subne   r8, r8, #1                  ; --n
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
     rsb     r4, r8, #32                 ; 32-n
     ldr     r10, [sp, #52]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
 
 ; loop start
 token_loop
@@ -78,7 +71,7 @@ token_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
     mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
 
     ; bb can only be 0 or 1.  So only execute this statement
@@ -172,16 +165,15 @@ token_count_lt_zero
     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
     str     r10, [sp, #4]               ; b->tree
 
-    rbit    r12, r7                     ; reverse v
     rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
 
     mov     lr, #0                      ; i = 0
 
 extra_bits_loop
     ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
     sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
     mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
     addcs   lr, lr, #1                  ; i + bb
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
similarity index 94%
rename from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
rename to vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index 0756455869dc595c2d529d87dd5fb5ce74c705f9..a9b552ae175bfb7f30528c116fdf95f537aa6abe 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT |vp8cx_pack_mb_row_tokens_armv7|
+    EXPORT |vp8cx_pack_mb_row_tokens_armv5|
 
     INCLUDE vpx_vp8_enc_asm_offsets.asm
 
@@ -25,7 +25,7 @@
 ; r3 vp8_extra_bits
 ; s0 vp8_coef_tree
 
-|vp8cx_pack_mb_row_tokens_armv7| PROC
+|vp8cx_pack_mb_row_tokens_armv5| PROC
     push    {r4-r11, lr}
     sub     sp, sp, #24
 
@@ -78,18 +78,11 @@ while_p_lt_stop
     movne   lr, #2                      ; i = 2
     subne   r8, r8, #1                  ; --n
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
     rsb     r4, r8, #32                 ; 32-n
     ldr     r10, [sp, #60]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                 ; r12 = v << 32 - n
 
 ; loop start
 token_loop
@@ -99,7 +92,7 @@ token_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
     mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
 
     ; bb can only be 0 or 1.  So only execute this statement
@@ -193,16 +186,15 @@ token_count_lt_zero
     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
     str     r10, [sp, #4]               ; b->tree
 
-    rbit    r12, r7                     ; reverse v
     rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
 
     mov     lr, #0                      ; i = 0
 
 extra_bits_loop
     ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
     sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
     mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
     addcs   lr, lr, #1                  ; i + bb
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
similarity index 95%
rename from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
rename to vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 10a3d985125572df07bd4fc750e388c2dcc29894..0835164e5e8755ed0b648eca1263a668e4f84e2f 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT |vp8cx_pack_tokens_into_partitions_armv7|
+    EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
 
     INCLUDE vpx_vp8_enc_asm_offsets.asm
 
@@ -27,7 +27,7 @@
 ; s1 vp8_extra_bits,
 ; s2 const vp8_tree_index *,
 
-|vp8cx_pack_tokens_into_partitions_armv7| PROC
+|vp8cx_pack_tokens_into_partitions_armv5| PROC
     push    {r4-r11, lr}
     sub     sp, sp, #44
 
@@ -106,18 +106,11 @@ while_p_lt_stop
     movne   lr, #2                      ; i = 2
     subne   r8, r8, #1                  ; --n
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
     rsb     r4, r8, #32                 ; 32-n
     ldr     r10, [sp, #88]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
 
 ; loop start
 token_loop
@@ -127,7 +120,7 @@ token_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
     mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
 
     ; bb can only be 0 or 1.  So only execute this statement
@@ -221,16 +214,15 @@ token_count_lt_zero
     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
     str     r10, [sp, #4]               ; b->tree
 
-    rbit    r12, r7                     ; reverse v
     rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
 
     mov     lr, #0                      ; i = 0
 
 extra_bits_loop
     ldrb    r4, [r9, lr, asr #1]        ; pp[i>>1]
     sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
     mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
     addcs   lr, lr, #1                  ; i + bb
 
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
deleted file mode 100644
index 8d70d635aefd6f9fa24a268e1819ec3fffbdd670..0000000000000000000000000000000000000000
--- a/vp8/encoder/arm/csystemdependent.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "variance.h"
-#include "onyx_int.h"
-
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-
-void vp8_cmachine_specific_config(VP8_COMP *cpi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    cpi->rtcd.common                         = &cpi->common.rtcd;
-
-#if HAVE_ARMV7
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-    /* The neon quantizer has not been updated to match the new exact
-     * quantizer introduced in commit e04e2935
-     */
-    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
-#elif HAVE_ARMV6
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#else
-    //pure c
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
-#else
-    vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
-#endif
-}
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index 774599bf030c893f434817c8c008906f64ed1321..41fa5d1928df9bba55dc30c5858f5236baabcd64 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -15,9 +15,11 @@
 #if HAVE_ARMV6
 extern prototype_fdct(vp8_short_walsh4x4_armv6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_fdct(vp8_short_fdct4x4_neon);
@@ -26,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon);
 extern prototype_fdct(vp8_fast_fdct8x4_neon);
 extern prototype_fdct(vp8_short_walsh4x4_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_fdct_short4x4
 #define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
 
@@ -40,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon);
 
 #undef  vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
+#endif
 
 #endif
 
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index eb699433f4c79d3967dc75efaf7a5a201cac90a2..8fe453735594d97effe18b2c4d77d0dca8df3082 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -30,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
 //#undef  vp8_encodemb_mbuverr
 //#define vp8_encodemb_mbuverr vp8_mbuverror_c
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_encodemb_subb
 #define vp8_encodemb_subb vp8_subtract_b_neon
 
@@ -38,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
 
 #undef  vp8_encodemb_submbuv
 #define vp8_encodemb_submbuv vp8_subtract_mbuv_neon
+#endif
 
 #endif
 
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 859e43f51524214bf88b4555c8860de8199c492d..fb9dd5a5b0402a94778b7d0a1084c6bec9119e01 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -38,6 +38,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon);
 //extern prototype_variance2(vp8_get16x16var_c);
 extern prototype_sad(vp8_get4x4sse_cs_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_variance_sad4x4
 #define vp8_variance_sad4x4 vp8_sad4x4_neon
 
@@ -100,6 +101,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
 
 #undef  vp8_variance_get4x4sse_cs
 #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
+#endif
 
 #endif
 
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index 559631338d7d33639c6b8cbffb58a83557a08f0f..f5d148ea477482d2833324ea191a0c14526b58bc 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -12,25 +12,25 @@
 #ifndef __INC_BITSTREAM_H
 #define __INC_BITSTREAM_H
 
-#if HAVE_ARMV7
-void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount,
+#if HAVE_ARMV5TE
+void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
                              vp8_token *,
                              vp8_extra_bit_struct *,
                              const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *,
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
         vp8_token *,
         vp8_extra_bit_struct *,
         const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w,
+void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
                                     vp8_token *,
                                     vp8_extra_bit_struct *,
                                     const vp8_tree_index *);
 # define pack_tokens(a,b,c)                  \
-    vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 # define pack_tokens_into_partitions(a,b,c,d)  \
-    vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 # define pack_mb_row_tokens(a,b)               \
-    vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 #else
 # define pack_tokens(a,b,c)                  pack_tokens_c(a,b,c)
 # define pack_tokens_into_partitions(a,b,c,d)  pack_tokens_into_partitions_c(a,b,c,d)
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 1acb73d9cbd04ac085be89c98975ac14e4ae3c77..520b08f51cae258f9d49287dcdfeff3a0b1ecc0b 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -15,6 +15,7 @@
 
 
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
 
 
 void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
@@ -94,4 +95,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     vp8_arch_x86_encoder_init(cpi);
 #endif
 
+#if ARCH_ARM
+    vp8_arch_arm_encoder_init(cpi);
+#endif
+
 }
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 53d68be52c5b9b812dfc5692e3aa89999e3a143d..7e1583dd9a2b7389731e314c1e22fe97a84141e6 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -31,6 +31,9 @@
 #include "vpx_ports/vpx_timer.h"
 #include "vpxerrors.h"
 #include "temporal_filter.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
 
 #include <math.h>
 #include <stdio.h>
@@ -2106,8 +2109,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
     CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
 
-    vp8_cmachine_specific_config(cpi);
     vp8_create_common(&cpi->common);
+    vp8_cmachine_specific_config(cpi);
 
     vp8_init_config((VP8_PTR)cpi, oxcf);
 
@@ -2852,9 +2855,20 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
     {
         //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
 #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+        }
 #endif
 
         cpi->Source = &cpi->scaled_source;
@@ -4624,10 +4638,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
 #if HAVE_ARMV7
 extern void vp8_push_neon(INT64 *store);
 extern void vp8_pop_neon(INT64 *store);
-static INT64 store_reg[8];
 #endif
 int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
 {
+    INT64 store_reg[8];
     VP8_COMP *cpi = (VP8_COMP *) ptr;
     VP8_COMMON *cm = &cpi->common;
     struct vpx_usec_timer  timer;
@@ -4636,7 +4650,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
         return -1;
 
 #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
 #endif
 
     vpx_usec_timer_start(&timer);
@@ -4645,7 +4664,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
     if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
     {
 #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
 #endif
         return -1;
     }
@@ -4686,9 +4710,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
         s->source_time_stamp = time_stamp;
         s->source_frame_flags = frame_flags;
 #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+        }
 #endif
         cpi->source_buffer_count = 1;
     }
@@ -4697,14 +4732,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
     cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
 #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
 #endif
 
     return 0;
 }
 int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
 {
-
+    INT64 store_reg[8];
     VP8_COMP *cpi = (VP8_COMP *) ptr;
     VP8_COMMON *cm = &cpi->common;
     struct vpx_usec_timer  tsctimer;
@@ -4715,7 +4755,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
         return -1;
 
 #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
 #endif
 
     vpx_usec_timer_start(&cmptimer);
@@ -4867,7 +4912,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
 #endif
 
 #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
 #endif
         return -1;
     }
@@ -4910,7 +4960,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
     if (!cpi)
     {
 #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
 #endif
         return 0;
     }
@@ -5099,7 +5154,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
 #endif
 
 #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
 #endif
 
     return 0;
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 79e07dbc0e8e0a383cf1d0c5ec741bdc343d5006..09e8b5412b5895fd3050b2c0941f5047037578ec 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -16,6 +16,9 @@
 #include "vpx_scale/yv12extend.h"
 #include "vpx_scale/vpxscale.h"
 #include "alloccommon.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
 
 extern void vp8_loop_filter_frame(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val);
 extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val, int sharpness_lvl);
@@ -306,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
     //  Make a copy of the unfiltered / processed recon buffer
 #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
-#else
-    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+    }
 #endif
 
     if (cm->frame_type == KEY_FRAME)
@@ -343,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
     //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-    vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+    }
 #endif
 
     while (filter_step > 0)
@@ -372,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
             //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
 #endif
 
             // If value is close to the best so far then bias towards a lower loop filter value.
@@ -401,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
             //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
 #endif
 
             // Was it better than the previous best?
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index ecca18a0a461d379918b975a5d3b9516a74538ec..3b5aaa54889b415005832e29ed9b615bfd616a27 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -112,6 +112,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
 endif
 
+VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/arm_systemdependent.c
+
 # common (c)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/filter_arm.c
@@ -119,15 +121,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/loopfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/recon_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra4x4_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/systemdependent.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/filter_c.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/idctllm.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/recon.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/reconintra4x4.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/generic/systemdependent.c
-
 # common (armv6)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/bilinearfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 1424bd15a2bbda115dbc4ef1973a3fc275d44234..d126faf32fcd98346ed1eaf7518dc164ee543005 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -13,17 +13,22 @@
 
 #File list for arm
 # encoder
-VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/csystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/arm_csystemdependent.c
 
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/boolhuff_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/mcomp_arm.c
 
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6)  += encoder/generic/csystemdependent.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/boolhuff.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/mcomp.c
+VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)  += encoder/boolhuff.c
+
+#File list for armv5te
+# encoder
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
 
 #File list for armv6
 # encoder
@@ -44,10 +49,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/boolhuff_armv7$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
 
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/vpx_vp8_enc_asm_offsets.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index ae0610cdac2e82714ec25d746110c2f732379c96..0803a9cb0958169b3aac7f479a1ee0df4267b50a 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -11,11 +11,9 @@
 
 #VP8_DX_SRCS list is modified according to different platforms.
 
+VP8_DX_SRCS-$(ARCH_ARM)  += decoder/arm/arm_dsystemdependent.c
+
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/idct_blk.c
 VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
 
 #File list for armv6
diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h
new file mode 100644
index 0000000000000000000000000000000000000000..81af1f11ffecd8f8832258fa2fc2751ccf736286
--- /dev/null
+++ b/vpx_ports/arm.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VPX_PORTS_ARM_H
+#define VPX_PORTS_ARM_H
+#include <stdlib.h>
+#include "config.h"
+
+/*ARMv5TE "Enhanced DSP" instructions.*/
+#define HAS_EDSP  0x01
+/*ARMv6 "Parallel" or "Media" instructions.*/
+#define HAS_MEDIA 0x02
+/*ARMv7 optional NEON instructions.*/
+#define HAS_NEON  0x04
+
+int arm_cpu_caps(void);
+
+#endif
+
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
new file mode 100644
index 0000000000000000000000000000000000000000..4109924cf3ad2f8ee1ce97031d092184fe56f749
--- /dev/null
+++ b/vpx_ports/arm_cpudetect.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "arm.h"
+
+static int arm_cpu_env_flags(int *flags)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS");
+    if (env && *env)
+    {
+        *flags = (int)strtol(env, NULL, 0);
+        return 0;
+    }
+    *flags = 0;
+    return -1;
+}
+
+static int arm_cpu_env_mask(void)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS_MASK");
+    return env && *env ? (int)strtol(env, NULL, 0) : ~0;
+}
+
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+#define WIN32_LEAN_AND_MEAN
+#define WIN32_EXTRA_LEAN
+#include <windows.h>
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* MSVC has no inline __asm support for ARM, but it does let you __emit
+     *  instructions via their assembled hex code.
+     * All of these instructions should be essentially nops.
+     */
+#if defined(HAVE_ARMV5TE)
+    if (mask & HAS_EDSP)
+    {
+        __try
+        {
+            /*PLD [r13]*/
+            __emit(0xF5DDF000);
+            flags |= HAS_EDSP;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV6)
+    if (mask & HAS_MEDIA)
+        __try
+        {
+            /*SHADD8 r3,r3,r3*/
+            __emit(0xE6333F93);
+            flags |= HAS_MEDIA;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV7)
+    if (mask & HAS_NEON)
+    {
+        __try
+        {
+            /*VORR q0,q0,q0*/
+            __emit(0xF2200150);
+            flags |= HAS_NEON;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#endif
+#endif
+#endif
+    return flags & mask;
+}
+
+#elif defined(__linux__)
+#include <stdio.h>
+
+int arm_cpu_caps(void)
+{
+    FILE *fin;
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
+     *  on Android.
+     * This also means that detection will fail in Scratchbox.
+     */
+    fin = fopen("/proc/cpuinfo","r");
+    if(fin != NULL)
+    {
+        /* 512 should be enough for anybody (it's even enough for all the flags
+         * that x86 has accumulated... so far).
+         */
+        char buf[512];
+        while (fgets(buf, 511, fin) != NULL)
+        {
+#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7)
+            if (memcmp(buf, "Features", 8) == 0)
+            {
+                char *p;
+#if defined(HAVE_ARMV5TE)
+                p=strstr(buf, " edsp");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_EDSP;
+                }
+#if defined(HAVE_ARMV7)
+                p = strstr(buf, " neon");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_NEON;
+                }
+#endif
+#endif
+            }
+#endif
+#if defined(HAVE_ARMV6)
+            if (memcmp(buf, "CPU architecture:",17) == 0){
+                int version;
+                version = atoi(buf+17);
+                if (version >= 6)
+                {
+                    flags |= HAS_MEDIA;
+                }
+            }
+#endif
+        }
+        fclose(fin);
+    }
+    return flags & mask;
+}
+
+#elif !CONFIG_RUNTIME_CPU_DETECT
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+#if defined(HAVE_ARMV5TE)
+    flags |= HAS_EDSP;
+#endif
+#if defined(HAVE_ARMV6)
+    flags |= HAS_MEDIA;
+#endif
+#if defined(HAVE_ARMV7)
+    flags |= HAS_NEON;
+#endif
+    return flags & mask;
+}
+
+#else
+#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
+ "available for your platform. Reconfigure without --enable-runtime-cpu-detect."
+#endif
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c
index 1e8bcb89d35b13cf278153e760729880fe0d7ae5..fee76fff78c0acb6539be0989762cbea0d33b535 100644
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -10,6 +10,7 @@
 
 
 #include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
 #include "vpx_scale/vpxscale.h"
 
 
@@ -47,6 +48,9 @@ extern void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CO
  ****************************************************************************/
 void vp8_scale_machine_specific_config()
 {
+#if HAVE_ARMV7 && CONFIG_RUNTIME_CPU_DETECT
+    int flags;
+#endif
     /*
     vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_armv4;
     vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_armv4;
@@ -73,14 +77,20 @@ void vp8_scale_machine_specific_config()
     vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;
     */
 
-#if HAVE_ARMV7
-    vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders_neon;
-    vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly_neon;
-    vp8_yv12_copy_frame_ptr               = vp8_yv12_copy_frame_neon;
-#else
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
     vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders;
     vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly;
     vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame;
 #endif
-
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    flags = arm_cpu_caps();
+    if (flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon;
+        vp8_yv12_copy_frame_yonly_ptr     = vp8_yv12_copy_frame_yonly_neon;
+        vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame_neon;
+    }
+#endif
 }