Skip to content
Snippets Groups Projects
Commit 7c645606 authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Implement sum_sqr_shift() using two passes with no branch inside the loops

Slightly slower on x86, about the same speed on ARMv7, should be faster on
DSPs.
parent fe4d91c2
No related branches found
No related tags found
No related merge requests found
......@@ -41,43 +41,40 @@ void silk_sum_sqr_shift(
)
{
opus_int i, shft;
opus_int32 nrg_tmp, nrg;
opus_uint32 nrg_tmp;
opus_int32 nrg;
nrg = 0;
shft = 0;
len--;
for( i = 0; i < len; i += 2 ) {
nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] );
nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] );
if( nrg < 0 ) {
/* Scale down */
nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
shft = 2;
i+=2;
break;
}
/* Do a first run with the maximum shift we could have. */
shft = 31-silk_CLZ32(len);
/* Let's be conservative with rounding and start with nrg=len. */
nrg = len;
for( i = 0; i < len - 1; i += 2 ) {
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}
for( ; i < len; i += 2 ) {
if( i < len ) {
/* One sample left to process */
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}
silk_assert( nrg >= 0 );
/* Make sure the result will fit in a 32-bit signed integer with two bits
of headroom. */
shft = silk_max_32(0, shft+3 - silk_CLZ32(nrg));
nrg = 0;
for( i = 0 ; i < len - 1; i += 2 ) {
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft );
if( nrg < 0 ) {
/* Scale down */
nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
shft += 2;
}
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}
if( i == len ) {
if( i < len ) {
/* One sample left to process */
nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
}
/* Make sure to have at least one extra leading zero (two leading zeros in total) */
if( nrg & 0xC0000000 ) {
nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
shft += 2;
}
silk_assert( nrg >= 0 );
/* Output arguments */
*shift = shft;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment