diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h index 76941302cb01c00a87345f3aaa837659390af91c..cbe7557987c561f625b0ed9546b06e93f76d55cc 100644 --- a/celt/_kiss_fft_guts.h +++ b/celt/_kiss_fft_guts.h @@ -110,7 +110,7 @@ "smull %[tt], %[mi], r1, %[br]\n\t" \ "smlal %[tt], %[mi], r0, %[bi]\n\t" \ "rsb %[bi], %[bi], #0\n\t" \ - "smull r0, %[mr], r0, %[br]\n\t" \ + "smull r0, %[mr], %[br], r0\n\t" \ "mov %[tt], %[tt], lsr #15\n\t" \ "smlal r0, %[mr], r1, %[bi]\n\t" \ "orr %[mi], %[tt], %[mi], lsl #17\n\t" \ @@ -138,7 +138,7 @@ "smull %[tt], %[mi], r1, %[br]\n\t" \ "smlal %[tt], %[mi], r0, %[bi]\n\t" \ "rsb %[bi], %[bi], #0\n\t" \ - "smull r0, %[mr], r0, %[br]\n\t" \ + "smull r0, %[mr], %[br], r0\n\t" \ "mov %[tt], %[tt], lsr #17\n\t" \ "smlal r0, %[mr], r1, %[bi]\n\t" \ "orr %[mi], %[tt], %[mi], lsl #15\n\t" \ @@ -166,7 +166,7 @@ "smull %[tt], %[mr], r0, %[br]\n\t" \ "smlal %[tt], %[mr], r1, %[bi]\n\t" \ "rsb %[bi], %[bi], #0\n\t" \ - "smull r1, %[mi], r1, %[br]\n\t" \ + "smull r1, %[mi], %[br], r1\n\t" \ "mov %[tt], %[tt], lsr #15\n\t" \ "smlal r1, %[mi], r0, %[bi]\n\t" \ "orr %[mr], %[tt], %[mr], lsl #17\n\t" \ diff --git a/celt/fixed_armv4.h b/celt/fixed_armv4.h index d38880f1c31e0cf1b01be43cf11dc1843818724c..73e4f434cfc65cab84b91caa11f77addfa78c90d 100644 --- a/celt/fixed_armv4.h +++ b/celt/fixed_armv4.h @@ -36,8 +36,8 @@ static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) __asm__( "#MULT16_32_Q16\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) - : "r"(b),"r"(a<<16) + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(b),"r"(a<<16) ); return rd_hi; } @@ -53,7 +53,7 @@ static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) __asm__( "#MULT16_32_Q15\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(b), "r"(a<<16) ); /*We intentionally don't OR in the high bit of rd_lo for speed.*/ diff --git a/celt/fixed_armv5e.h b/celt/fixed_armv5e.h index 6b96150abab32d1b2e680ec36ff061d525658e3d..9d70d35653d03cc220342b4d53969f0daf40f737 100644 --- a/celt/fixed_armv5e.h +++ b/celt/fixed_armv5e.h @@ -52,26 +52,14 @@ static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) #undef MULT16_32_Q15 static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) { -#if 0 - unsigned rd_lo; - int rd_hi; - __asm__( - "#MULT16_32_Q15\n\t" - "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) - : "%r"(b), "r"(a<<16) - ); - return (rd_lo>>31)|(rd_hi<<1); -#else int res; __asm__( "#MULT16_32_Q15\n\t" "smulwb %0, %1, %2\n\t" : "=r"(res) - : "%r"(b), "r"(a) + : "r"(b), "r"(a) ); return res<<1; -#endif } #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) diff --git a/silk/SigProc_FIX_armv4.h b/silk/SigProc_FIX_armv4.h index ea3720207b191b1fc7fe9899d4ba896fdeda0b4c..d69573e3d4d297939f7e2749aa1c496c30304d0b 100644 --- a/silk/SigProc_FIX_armv4.h +++ b/silk/SigProc_FIX_armv4.h @@ -37,7 +37,7 @@ static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, __asm__( "#silk_MLA\n\t" "mla %0, %1, %2, %3\n\t" - : "=r"(res) + : "=&r"(res) : "r"(b), "r"(c), "r"(a) ); return res; diff --git a/silk/SigProc_FIX_armv5e.h b/silk/SigProc_FIX_armv5e.h index 804e2bc55287d40719745d0c9d41c683c85b80cc..81a6324f652022312aa5e21cedbf36669116b09d 100644 --- a/silk/SigProc_FIX_armv5e.h +++ b/silk/SigProc_FIX_armv5e.h @@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) "#silk_SMULTT\n\t" "smultt %0, %1, %2\n\t" : "=r"(res) - : "r"(a), "r"(b) + : "%r"(a), "r"(b) ); return res; } @@ -52,7 +52,7 @@ static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, "#silk_SMLATT\n\t" "smlatt %0, %1, %2, %3\n\t" : "=r"(res) - : "r"(b), "r"(c), "r"(a) + : "%r"(b), "r"(c), "r"(a) ); return res; } diff --git a/silk/macros_armv4.h b/silk/macros_armv4.h index e5dfe69c7fb45ce9e8d8123295003a57df8db6ea..58df6c2e2b319f91df03e9fc4665bfdf57f44d34 100644 --- a/silk/macros_armv4.h +++ b/silk/macros_armv4.h @@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) __asm__( "#silk_SMULWB\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b<<16) ); return rd_hi; @@ -57,7 +57,7 @@ static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) __asm__( "#silk_SMULWT\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b&~0xFFFF) ); return rd_hi; @@ -77,10 +77,10 @@ static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) __asm__( "#silk_SMULWW\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b) ); - return (rd_lo>>16)|(rd_hi<<16); + return (rd_hi<<16)+(rd_lo>>16); } #define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) @@ -91,12 +91,12 @@ static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, unsigned rd_lo; int rd_hi; __asm__( - "#silk_SMULWW\n\t" + "#silk_SMLAWW\n\t" "smull %0, %1, %2, %3\n\t" - : "=r"(rd_lo), "=r"(rd_hi) + : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(b), "r"(c) ); - return a+((rd_lo>>16)|(rd_hi<<16)); + return a+(rd_hi<<16)+(rd_lo>>16); } #define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c)) diff --git a/silk/macros_armv5e.h b/silk/macros_armv5e.h index a86586b33cfa2c8b135a4ed38f8d4233296fd95f..63b1e30f48bcf3498fce08b4307576aad9a7643f 100644 --- a/silk/macros_armv5e.h +++ b/silk/macros_armv5e.h @@ -203,7 +203,7 @@ static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32) __asm__( "#silk_CLZ32\n\t" "clz %0, %1\n\t" - : "=&r"(res) + : "=r"(res) : "r"(in32) ); return res;