diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h
index 76941302cb01c00a87345f3aaa837659390af91c..cbe7557987c561f625b0ed9546b06e93f76d55cc 100644
--- a/celt/_kiss_fft_guts.h
+++ b/celt/_kiss_fft_guts.h
@@ -110,7 +110,7 @@
             "smull %[tt], %[mi], r1, %[br]\n\t" \
             "smlal %[tt], %[mi], r0, %[bi]\n\t" \
             "rsb %[bi], %[bi], #0\n\t" \
-            "smull r0, %[mr], r0, %[br]\n\t" \
+            "smull r0, %[mr], %[br], r0\n\t" \
             "mov %[tt], %[tt], lsr #15\n\t" \
             "smlal r0, %[mr], r1, %[bi]\n\t" \
             "orr %[mi], %[tt], %[mi], lsl #17\n\t" \
@@ -138,7 +138,7 @@
             "smull %[tt], %[mi], r1, %[br]\n\t" \
             "smlal %[tt], %[mi], r0, %[bi]\n\t" \
             "rsb %[bi], %[bi], #0\n\t" \
-            "smull r0, %[mr], r0, %[br]\n\t" \
+            "smull r0, %[mr], %[br], r0\n\t" \
             "mov %[tt], %[tt], lsr #17\n\t" \
             "smlal r0, %[mr], r1, %[bi]\n\t" \
             "orr %[mi], %[tt], %[mi], lsl #15\n\t" \
@@ -166,7 +166,7 @@
             "smull %[tt], %[mr], r0, %[br]\n\t" \
             "smlal %[tt], %[mr], r1, %[bi]\n\t" \
             "rsb %[bi], %[bi], #0\n\t" \
-            "smull r1, %[mi], r1, %[br]\n\t" \
+            "smull r1, %[mi], %[br], r1\n\t" \
             "mov %[tt], %[tt], lsr #15\n\t" \
             "smlal r1, %[mi], r0, %[bi]\n\t" \
             "orr %[mr], %[tt], %[mr], lsl #17\n\t" \
diff --git a/celt/fixed_armv4.h b/celt/fixed_armv4.h
index d38880f1c31e0cf1b01be43cf11dc1843818724c..73e4f434cfc65cab84b91caa11f77addfa78c90d 100644
--- a/celt/fixed_armv4.h
+++ b/celt/fixed_armv4.h
@@ -36,8 +36,8 @@ static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
   __asm__(
       "#MULT16_32_Q16\n\t"
       "smull %0, %1, %2, %3\n\t"
-      : "=r"(rd_lo), "=r"(rd_hi)
-      : "r"(b),"r"(a<<16)
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b),"r"(a<<16)
   );
   return rd_hi;
 }
@@ -53,7 +53,7 @@ static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
   __asm__(
       "#MULT16_32_Q15\n\t"
       "smull %0, %1, %2, %3\n\t"
-      : "=r"(rd_lo), "=r"(rd_hi)
+      : "=&r"(rd_lo), "=&r"(rd_hi)
       : "%r"(b), "r"(a<<16)
   );
   /*We intentionally don't OR in the high bit of rd_lo for speed.*/
diff --git a/celt/fixed_armv5e.h b/celt/fixed_armv5e.h
index 6b96150abab32d1b2e680ec36ff061d525658e3d..9d70d35653d03cc220342b4d53969f0daf40f737 100644
--- a/celt/fixed_armv5e.h
+++ b/celt/fixed_armv5e.h
@@ -52,26 +52,14 @@ static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
 #undef MULT16_32_Q15
 static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
 {
-#if 0
-  unsigned rd_lo;
-  int rd_hi;
-  __asm__(
-      "#MULT16_32_Q15\n\t"
-      "smull %0, %1, %2, %3\n\t"
-      : "=r"(rd_lo), "=r"(rd_hi)
-      : "%r"(b), "r"(a<<16)
-  );
-  return (rd_lo>>31)|(rd_hi<<1);
-#else
   int res;
   __asm__(
       "#MULT16_32_Q15\n\t"
       "smulwb %0, %1, %2\n\t"
       : "=r"(res)
-      : "%r"(b), "r"(a)
+      : "r"(b), "r"(a)
   );
   return res<<1;
-#endif
 }
 #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
 
diff --git a/silk/SigProc_FIX_armv4.h b/silk/SigProc_FIX_armv4.h
index ea3720207b191b1fc7fe9899d4ba896fdeda0b4c..d69573e3d4d297939f7e2749aa1c496c30304d0b 100644
--- a/silk/SigProc_FIX_armv4.h
+++ b/silk/SigProc_FIX_armv4.h
@@ -37,7 +37,7 @@ static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
   __asm__(
       "#silk_MLA\n\t"
       "mla %0, %1, %2, %3\n\t"
-      : "=r"(res)
+      : "=&r"(res)
       : "r"(b), "r"(c), "r"(a)
   );
   return res;
diff --git a/silk/SigProc_FIX_armv5e.h b/silk/SigProc_FIX_armv5e.h
index 804e2bc55287d40719745d0c9d41c683c85b80cc..81a6324f652022312aa5e21cedbf36669116b09d 100644
--- a/silk/SigProc_FIX_armv5e.h
+++ b/silk/SigProc_FIX_armv5e.h
@@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
       "#silk_SMULTT\n\t"
       "smultt %0, %1, %2\n\t"
       : "=r"(res)
-      : "r"(a), "r"(b)
+      : "%r"(a), "r"(b)
   );
   return res;
 }
@@ -52,7 +52,7 @@ static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
       "#silk_SMLATT\n\t"
       "smlatt %0, %1, %2, %3\n\t"
       : "=r"(res)
-      : "r"(b), "r"(c), "r"(a)
+      : "%r"(b), "r"(c), "r"(a)
   );
   return res;
 }
diff --git a/silk/macros_armv4.h b/silk/macros_armv4.h
index e5dfe69c7fb45ce9e8d8123295003a57df8db6ea..58df6c2e2b319f91df03e9fc4665bfdf57f44d34 100644
--- a/silk/macros_armv4.h
+++ b/silk/macros_armv4.h
@@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
   __asm__(
       "#silk_SMULWB\n\t"
       "smull %0, %1, %2, %3\n\t"
-      : "=r"(rd_lo), "=r"(rd_hi)
+      : "=&r"(rd_lo), "=&r"(rd_hi)
       : "%r"(a), "r"(b<<16)
   );
   return rd_hi;
@@ -57,7 +57,7 @@ static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
   __asm__(
       "#silk_SMULWT\n\t"
       "smull %0, %1, %2, %3\n\t"
-      : "=r"(rd_lo), "=r"(rd_hi)
+      : "=&r"(rd_lo), "=&r"(rd_hi)
       : "%r"(a), "r"(b&~0xFFFF)
   );
   return rd_hi;
@@ -77,10 +77,10 @@ static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
   __asm__(
     "#silk_SMULWW\n\t"
     "smull %0, %1, %2, %3\n\t"
-    : "=r"(rd_lo), "=r"(rd_hi)
+    : "=&r"(rd_lo), "=&r"(rd_hi)
     : "%r"(a), "r"(b)
   );
-  return (rd_lo>>16)|(rd_hi<<16);
+  return (rd_hi<<16)+(rd_lo>>16);
 }
 #define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
 
@@ -91,12 +91,12 @@ static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
   unsigned rd_lo;
   int rd_hi;
   __asm__(
-    "#silk_SMULWW\n\t"
+    "#silk_SMLAWW\n\t"
     "smull %0, %1, %2, %3\n\t"
-    : "=r"(rd_lo), "=r"(rd_hi)
+    : "=&r"(rd_lo), "=&r"(rd_hi)
     : "%r"(b), "r"(c)
   );
-  return a+((rd_lo>>16)|(rd_hi<<16));
+  return a+(rd_hi<<16)+(rd_lo>>16);
 }
 #define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
 
diff --git a/silk/macros_armv5e.h b/silk/macros_armv5e.h
index a86586b33cfa2c8b135a4ed38f8d4233296fd95f..63b1e30f48bcf3498fce08b4307576aad9a7643f 100644
--- a/silk/macros_armv5e.h
+++ b/silk/macros_armv5e.h
@@ -203,7 +203,7 @@ static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32)
   __asm__(
       "#silk_CLZ32\n\t"
       "clz %0, %1\n\t"
-      : "=&r"(res)
+      : "=r"(res)
       : "r"(in32)
   );
   return res;