From 68242ac58cca3d09b55d57f33d22144e701c923b Mon Sep 17 00:00:00 2001 From: "Timothy B. Terriberry" <tterribe@xiph.org> Date: Tue, 27 Jul 2010 15:09:51 -0700 Subject: [PATCH] Eliminate the loop when decoding the split angle. Use a closed-form formula for the search instead. This requires an integer sqrt, so it is not actually closed-form, but the number of iterations is O(qb) instead of O(2**qb). --- libcelt/bands.c | 26 ++++++++++++++------------ libcelt/cwrs.c | 27 --------------------------- libcelt/mathops.c | 27 +++++++++++++++++++++++++++ libcelt/mathops.h | 1 + 4 files changed, 42 insertions(+), 39 deletions(-) diff --git a/libcelt/bands.c b/libcelt/bands.c index 11e7190ee..51652cc12 100644 --- a/libcelt/bands.c +++ b/libcelt/bands.c @@ -664,21 +664,23 @@ static void quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_ ec_encode((ec_enc*)ec, fl, fl+fs, ft); } else { int fl=0; - int j, fm; + int fm; fm = ec_decode((ec_dec*)ec, ft); - j=0; - while (1) + + if (fm < ((1<<qb>>1)*((1<<qb>>1) + 1)>>1)) + { + itheta = (isqrt32(8*(celt_uint32)fm + 1) - 1)>>1; + fs = itheta + 1; + fl = itheta*(itheta + 1)>>1; + } + else { - if (fm < fl+fs) - break; - fl+=fs; - if (j<(1<<qb>>1)) - fs++; - else - fs--; - j++; + itheta = (2*((1<<qb) + 1) + - isqrt32(8*(celt_uint32)(ft - fm - 1) + 1))>>1; + fs = (1<<qb) + 1 - itheta; + fl = ft - (((1<<qb) + 1 - itheta)*((1<<qb) + 2 - itheta)>>1); } - itheta = j; + ec_dec_update((ec_dec*)ec, fl, fl+fs, ft); } qalloc = log2_frac(ft,BITRES) - log2_frac(fs,BITRES) + 1; diff --git a/libcelt/cwrs.c b/libcelt/cwrs.c index 0c077096d..2d9975c89 100644 --- a/libcelt/cwrs.c +++ b/libcelt/cwrs.c @@ -148,33 +148,6 @@ static inline celt_uint32 imusdiv32even(celt_uint32 _a,celt_uint32 _b, (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32; } -/*Compute floor(sqrt(_val)) with exact arithmetic. - This has been tested on all possible 32-bit inputs.*/ -static unsigned isqrt32(celt_uint32 _val){ - unsigned b; - unsigned g; - int bshift; - /*Uses the second method from - http://www.azillionmonkeys.com/qed/sqroot.html - The main idea is to search for the largest binary digit b such that - (g+b)*(g+b) <= _val, and add it to the solution g.*/ - g=0; - bshift=EC_ILOG(_val)-1>>1; - b=1U<<bshift; - do{ - celt_uint32 t; - t=((celt_uint32)g<<1)+b<<bshift; - if(t<=_val){ - g+=b; - _val-=t; - } - b>>=1; - bshift--; - } - while(bshift>=0); - return g; -} - #endif /* SMALL_FOOTPRINT */ /*Although derived separately, the pulse vector coding scheme is equivalent to diff --git a/libcelt/mathops.c b/libcelt/mathops.c index 2a9ab5e78..3857a9b44 100644 --- a/libcelt/mathops.c +++ b/libcelt/mathops.c @@ -41,6 +41,33 @@ #include "mathops.h" +/*Compute floor(sqrt(_val)) with exact arithmetic. + This has been tested on all possible 32-bit inputs.*/ +unsigned isqrt32(celt_uint32 _val){ + unsigned b; + unsigned g; + int bshift; + /*Uses the second method from + http://www.azillionmonkeys.com/qed/sqroot.html + The main idea is to search for the largest binary digit b such that + (g+b)*(g+b) <= _val, and add it to the solution g.*/ + g=0; + bshift=EC_ILOG(_val)-1>>1; + b=1U<<bshift; + do{ + celt_uint32 t; + t=((celt_uint32)g<<1)+b<<bshift; + if(t<=_val){ + g+=b; + _val-=t; + } + b>>=1; + bshift--; + } + while(bshift>=0); + return g; +} + #ifdef FIXED_POINT celt_word32 frac_div32(celt_word32 a, celt_word32 b) diff --git a/libcelt/mathops.h b/libcelt/mathops.h index 224857df8..8717ca91f 100644 --- a/libcelt/mathops.h +++ b/libcelt/mathops.h @@ -45,6 +45,7 @@ /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ #define FRAC_MUL16(a,b) ((16384+((celt_int32)(celt_int16)(a)*(celt_int16)(b)))>>15) +unsigned isqrt32(celt_uint32 _val); #ifndef FIXED_POINT -- GitLab