Skip to content
Snippets Groups Projects
Commit b518b56f authored by Timothy B. Terriberry's avatar Timothy B. Terriberry
Browse files

Clean up register constraints.

http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/CIHBJEHG.html
 says that "Rd cannot be the same as Rm."
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/CIHBJEHG.html
 says that "RdLo, RdHi, and Rm must all be different registers."
This means that some of the early clobbers I removed really should
 have been there (to prevent aliasing Rd, RdLo, or RdHi with Rm).
It also means that we should reverse some of the operands in the
 FFT's complex multiplies.
This should only affect the ARMv4 optimizations.

Thanks to Nils Wallménius for the report.

While we're here, audit the commutative pair flags again, since I
 screwed up at least one of them, and eliminate some dead code.
parent 9880c4cd
No related branches found
No related tags found
No related merge requests found
......@@ -110,7 +110,7 @@
"smull %[tt], %[mi], r1, %[br]\n\t" \
"smlal %[tt], %[mi], r0, %[bi]\n\t" \
"rsb %[bi], %[bi], #0\n\t" \
"smull r0, %[mr], r0, %[br]\n\t" \
"smull r0, %[mr], %[br], r0\n\t" \
"mov %[tt], %[tt], lsr #15\n\t" \
"smlal r0, %[mr], r1, %[bi]\n\t" \
"orr %[mi], %[tt], %[mi], lsl #17\n\t" \
......@@ -138,7 +138,7 @@
"smull %[tt], %[mi], r1, %[br]\n\t" \
"smlal %[tt], %[mi], r0, %[bi]\n\t" \
"rsb %[bi], %[bi], #0\n\t" \
"smull r0, %[mr], r0, %[br]\n\t" \
"smull r0, %[mr], %[br], r0\n\t" \
"mov %[tt], %[tt], lsr #17\n\t" \
"smlal r0, %[mr], r1, %[bi]\n\t" \
"orr %[mi], %[tt], %[mi], lsl #15\n\t" \
......@@ -166,7 +166,7 @@
"smull %[tt], %[mr], r0, %[br]\n\t" \
"smlal %[tt], %[mr], r1, %[bi]\n\t" \
"rsb %[bi], %[bi], #0\n\t" \
"smull r1, %[mi], r1, %[br]\n\t" \
"smull r1, %[mi], %[br], r1\n\t" \
"mov %[tt], %[tt], lsr #15\n\t" \
"smlal r1, %[mi], r0, %[bi]\n\t" \
"orr %[mr], %[tt], %[mr], lsl #17\n\t" \
......
......@@ -36,8 +36,8 @@ static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
__asm__(
"#MULT16_32_Q16\n\t"
"smull %0, %1, %2, %3\n\t"
: "=r"(rd_lo), "=r"(rd_hi)
: "r"(b),"r"(a<<16)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b),"r"(a<<16)
);
return rd_hi;
}
......@@ -53,7 +53,7 @@ static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
__asm__(
"#MULT16_32_Q15\n\t"
"smull %0, %1, %2, %3\n\t"
: "=r"(rd_lo), "=r"(rd_hi)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b), "r"(a<<16)
);
/*We intentionally don't OR in the high bit of rd_lo for speed.*/
......
......@@ -52,26 +52,14 @@ static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
#undef MULT16_32_Q15
static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
{
#if 0
unsigned rd_lo;
int rd_hi;
__asm__(
"#MULT16_32_Q15\n\t"
"smull %0, %1, %2, %3\n\t"
: "=r"(rd_lo), "=r"(rd_hi)
: "%r"(b), "r"(a<<16)
);
return (rd_lo>>31)|(rd_hi<<1);
#else
int res;
__asm__(
"#MULT16_32_Q15\n\t"
"smulwb %0, %1, %2\n\t"
: "=r"(res)
: "%r"(b), "r"(a)
: "r"(b), "r"(a)
);
return res<<1;
#endif
}
#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
......
......@@ -37,7 +37,7 @@ static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
__asm__(
"#silk_MLA\n\t"
"mla %0, %1, %2, %3\n\t"
: "=r"(res)
: "=&r"(res)
: "r"(b), "r"(c), "r"(a)
);
return res;
......
......@@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
"#silk_SMULTT\n\t"
"smultt %0, %1, %2\n\t"
: "=r"(res)
: "r"(a), "r"(b)
: "%r"(a), "r"(b)
);
return res;
}
......@@ -52,7 +52,7 @@ static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
"#silk_SMLATT\n\t"
"smlatt %0, %1, %2, %3\n\t"
: "=r"(res)
: "r"(b), "r"(c), "r"(a)
: "%r"(b), "r"(c), "r"(a)
);
return res;
}
......
......@@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
__asm__(
"#silk_SMULWB\n\t"
"smull %0, %1, %2, %3\n\t"
: "=r"(rd_lo), "=r"(rd_hi)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b<<16)
);
return rd_hi;
......@@ -57,7 +57,7 @@ static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
__asm__(
"#silk_SMULWT\n\t"
"smull %0, %1, %2, %3\n\t"
: "=r"(rd_lo), "=r"(rd_hi)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b&~0xFFFF)
);
return rd_hi;
......@@ -77,10 +77,10 @@ static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
__asm__(
"#silk_SMULWW\n\t"
"smull %0, %1, %2, %3\n\t"
: "=r"(rd_lo), "=r"(rd_hi)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b)
);
return (rd_lo>>16)|(rd_hi<<16);
return (rd_hi<<16)+(rd_lo>>16);
}
#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
......@@ -91,12 +91,12 @@ static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
unsigned rd_lo;
int rd_hi;
__asm__(
"#silk_SMULWW\n\t"
"#silk_SMLAWW\n\t"
"smull %0, %1, %2, %3\n\t"
: "=r"(rd_lo), "=r"(rd_hi)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b), "r"(c)
);
return a+((rd_lo>>16)|(rd_hi<<16));
return a+(rd_hi<<16)+(rd_lo>>16);
}
#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
......
......@@ -203,7 +203,7 @@ static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32)
__asm__(
"#silk_CLZ32\n\t"
"clz %0, %1\n\t"
: "=&r"(res)
: "=r"(res)
: "r"(in32)
);
return res;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment