From 68242ac58cca3d09b55d57f33d22144e701c923b Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Tue, 27 Jul 2010 15:09:51 -0700
Subject: [PATCH] Eliminate the loop when decoding the split angle.

Use a closed-form formula for the search instead.
This requires an integer sqrt, so it is not actually closed-form,
 but the number of iterations is O(qb) instead of O(2**qb).
---
 libcelt/bands.c   | 26 ++++++++++++++------------
 libcelt/cwrs.c    | 27 ---------------------------
 libcelt/mathops.c | 27 +++++++++++++++++++++++++++
 libcelt/mathops.h |  1 +
 4 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/libcelt/bands.c b/libcelt/bands.c
index 11e7190ee..51652cc12 100644
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -664,21 +664,23 @@ static void quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_
                ec_encode((ec_enc*)ec, fl, fl+fs, ft);
             } else {
                int fl=0;
-               int j, fm;
+               int fm;
                fm = ec_decode((ec_dec*)ec, ft);
-               j=0;
-               while (1)
+
+               if (fm < ((1<<qb>>1)*((1<<qb>>1) + 1)>>1))
+               {
+                  itheta = (isqrt32(8*(celt_uint32)fm + 1) - 1)>>1;
+                  fs = itheta + 1;
+                  fl = itheta*(itheta + 1)>>1;
+               }
+               else
                {
-                  if (fm < fl+fs)
-                     break;
-                  fl+=fs;
-                  if (j<(1<<qb>>1))
-                     fs++;
-                  else
-                     fs--;
-                  j++;
+                  itheta = (2*((1<<qb) + 1)
+                   - isqrt32(8*(celt_uint32)(ft - fm - 1) + 1))>>1;
+                  fs = (1<<qb) + 1 - itheta;
+                  fl = ft - (((1<<qb) + 1 - itheta)*((1<<qb) + 2 - itheta)>>1);
                }
-               itheta = j;
+
                ec_dec_update((ec_dec*)ec, fl, fl+fs, ft);
             }
             qalloc = log2_frac(ft,BITRES) - log2_frac(fs,BITRES) + 1;
diff --git a/libcelt/cwrs.c b/libcelt/cwrs.c
index 0c077096d..2d9975c89 100644
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -148,33 +148,6 @@ static inline celt_uint32 imusdiv32even(celt_uint32 _a,celt_uint32 _b,
    (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32;
 }
 
-/*Compute floor(sqrt(_val)) with exact arithmetic.
-  This has been tested on all possible 32-bit inputs.*/
-static unsigned isqrt32(celt_uint32 _val){
-  unsigned b;
-  unsigned g;
-  int      bshift;
-  /*Uses the second method from
-     http://www.azillionmonkeys.com/qed/sqroot.html
-    The main idea is to search for the largest binary digit b such that
-     (g+b)*(g+b) <= _val, and add it to the solution g.*/
-  g=0;
-  bshift=EC_ILOG(_val)-1>>1;
-  b=1U<<bshift;
-  do{
-    celt_uint32 t;
-    t=((celt_uint32)g<<1)+b<<bshift;
-    if(t<=_val){
-      g+=b;
-      _val-=t;
-    }
-    b>>=1;
-    bshift--;
-  }
-  while(bshift>=0);
-  return g;
-}
-
 #endif /* SMALL_FOOTPRINT */
 
 /*Although derived separately, the pulse vector coding scheme is equivalent to
diff --git a/libcelt/mathops.c b/libcelt/mathops.c
index 2a9ab5e78..3857a9b44 100644
--- a/libcelt/mathops.c
+++ b/libcelt/mathops.c
@@ -41,6 +41,33 @@
 
 #include "mathops.h"
 
+/*Compute floor(sqrt(_val)) with exact arithmetic.
+  This has been tested on all possible 32-bit inputs.*/
+unsigned isqrt32(celt_uint32 _val){
+  unsigned b;
+  unsigned g;
+  int      bshift;
+  /*Uses the second method from
+     http://www.azillionmonkeys.com/qed/sqroot.html
+    The main idea is to search for the largest binary digit b such that
+     (g+b)*(g+b) <= _val, and add it to the solution g.*/
+  g=0;
+  bshift=EC_ILOG(_val)-1>>1;
+  b=1U<<bshift;
+  do{
+    celt_uint32 t;
+    t=((celt_uint32)g<<1)+b<<bshift;
+    if(t<=_val){
+      g+=b;
+      _val-=t;
+    }
+    b>>=1;
+    bshift--;
+  }
+  while(bshift>=0);
+  return g;
+}
+
 #ifdef FIXED_POINT
 
 celt_word32 frac_div32(celt_word32 a, celt_word32 b)
diff --git a/libcelt/mathops.h b/libcelt/mathops.h
index 224857df8..8717ca91f 100644
--- a/libcelt/mathops.h
+++ b/libcelt/mathops.h
@@ -45,6 +45,7 @@
 /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
 #define FRAC_MUL16(a,b) ((16384+((celt_int32)(celt_int16)(a)*(celt_int16)(b)))>>15)
 
+unsigned isqrt32(celt_uint32 _val);
 
 #ifndef FIXED_POINT
 
-- 
GitLab