From f13fea7b530a178ff69dfe4682f3d50ac79537e0 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Tue, 11 Dec 2007 13:25:57 +1100
Subject: [PATCH] Added ec_{enc|dec}_bits64 and ec_{enc|dec}_bits64. Updated
 broken paper URLs. Cleaned up non-multiply-free range coder documentation
 (e.g., stop claiming it's multiply-free).

---
 libentcode/bitree.h   |  4 ++--
 libentcode/ecintrin.h | 20 ++++++++++++++++++++
 libentcode/ectest.c   | 13 +++++++++++++
 libentcode/entcode.c  | 28 +++++++++++++++++++++++++++
 libentcode/entcode.h  |  2 ++
 libentcode/entdec.c   | 44 +++++++++++++++++++++++++++++++++++++++----
 libentcode/entdec.h   | 14 ++++++++++++++
 libentcode/entenc.c   | 38 +++++++++++++++++++++++++++++++++++--
 libentcode/entenc.h   | 10 ++++++++++
 libentcode/mfrngdec.c |  2 +-
 libentcode/mfrngenc.c |  2 +-
 libentcode/rangedec.c | 25 ++++--------------------
 libentcode/rangeenc.c | 17 ++++-------------
 13 files changed, 175 insertions(+), 44 deletions(-)

diff --git a/libentcode/bitree.h b/libentcode/bitree.h
index 01efc165..1eda4044 100644
--- a/libentcode/bitree.h
+++ b/libentcode/bitree.h
@@ -33,7 +33,7 @@
    year       =1993,
    number     =88,
    month      =May,
-   URL        ="http://www.cs.auckland.ac.nz/~peter-f/ftplink/TechRep88.ps"
+   URL        ="http://www.cs.auckland.ac.nz/~peter-f/FTPfiles/TechRep88.ps"
   }
   @TECHREPORT{Fen95,
    author     ="Peter Fenwick",
@@ -43,7 +43,7 @@
    year       =1995,
    number     =110,
    month      =Feb,
-   URL        ="http://www.cs.auckland.ac.nz/~peter-f/ftplink/TechRep110.ps"
+   URL        ="http://www.cs.auckland.ac.nz/~peter-f/FTPfiles/TechRep110.ps"
   }
   @ARTICLE{Mof99,
     author    ="Alistair Moffat",
diff --git a/libentcode/ecintrin.h b/libentcode/ecintrin.h
index d13dabd9..4c00596a 100644
--- a/libentcode/ecintrin.h
+++ b/libentcode/ecintrin.h
@@ -67,5 +67,25 @@
 #else
 # define EC_ILOG(_x) (ec_ilog(_x))
 #endif
+#if __GNUC_PREREQ(3,4)
+# if INT_MAX>=9223372036854775807
+#  define EC_CLZ64_0 sizeof(unsigned)*CHAR_BIT
+#  define EC_CLZ64(_x) (__builtin_clz(_x))
+# elif LONG_MAX>=9223372036854775807L
+#  define EC_CLZ64_0 sizeof(unsigned long)*CHAR_BIT
+#  define EC_CLZ64(_x) (__builtin_clzl(_x))
+# elif LLONG_MAX>=9223372036854775807LL
+#  define EC_CLZ64_0 sizeof(unsigned long long)*CHAR_BIT
+#  define EC_CLZ64(_x) (__builtin_clzll(_x))
+# endif
+#endif
+#if defined(EC_CLZ64)
+/*Note that __builtin_clz is not defined when _x==0, according to the gcc
+   documentation (and that of the BSR instruction that implements it on x86),
+   so we have to special-case it.*/
+# define EC_ILOG64(_x) (EC_CLZ64_0-EC_CLZ64(_x)&-!!(_x))
+#else
+# define EC_ILOG64(_x) (ec_ilog64(_x))
+#endif
 
 #endif
diff --git a/libentcode/ectest.c b/libentcode/ectest.c
index a08222a5..c254722d 100644
--- a/libentcode/ectest.c
+++ b/libentcode/ectest.c
@@ -7,6 +7,7 @@ int main(int _argc,char **_argv){
   ec_enc         enc;
   ec_dec         dec;
   ec_probmod     mod;
+  ec_uint64      sym64;
   int            ft;
   int            ftb;
   int            sym;
@@ -19,12 +20,14 @@ int main(int _argc,char **_argv){
   for(ft=0;ft<1024;ft++){
     for(i=0;i<ft;i++){
       ec_enc_uint(&enc,i,ft);
+      ec_enc_uint64(&enc,(ec_uint64)i<<30|i,(ec_uint64)ft<<30);
     }
   }
   /*Testing encoding of raw bit values.*/
   for(ftb=0;ftb<16;ftb++){
     for(i=0;i<(1<<ftb);i++){
       ec_enc_bits(&enc,i,ftb);
+      ec_enc_bits64(&enc,(ec_uint64)i<<30|i,ftb+30);
     }
   }
   for(sz=1;sz<256;sz++){
@@ -54,6 +57,11 @@ int main(int _argc,char **_argv){
         fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft);
         return -1;
       }
+      sym64=ec_dec_uint64(&dec,(ec_uint64)ft<<30);
+      if(sym64!=((ec_uint64)i<<30|i)){
+        fprintf(stderr,"Decoded %lli instead of %lli with ft of %lli.\n",sym64,
+         (ec_uint64)i<<30|i,(ec_uint64)ft<<30);
+      }
     }
   }
   for(ftb=0;ftb<16;ftb++){
@@ -63,6 +71,11 @@ int main(int _argc,char **_argv){
         fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb);
         return -1;
       }
+      sym64=ec_dec_bits64(&dec,ftb+30);
+      if(sym64!=((ec_uint64)i<<30|i)){
+        fprintf(stderr,"Decoded %lli instead of %lli with ftb of %i.\n",
+         sym64,(ec_uint64)i<<30|i,ftb+30);
+      }
     }
   }
   for(sz=1;sz<256;sz++){
diff --git a/libentcode/entcode.c b/libentcode/entcode.c
index a4e5100e..3ace831f 100644
--- a/libentcode/entcode.c
+++ b/libentcode/entcode.c
@@ -42,3 +42,31 @@ int ec_ilog(ec_uint32 _v){
   return ret;
 #endif
 }
+
+int ec_ilog64(ec_uint64 _v){
+#if defined(EC_CLZ64)
+  return EC_CLZ64_0-EC_CLZ64(_v)&-!!_v;
+#else
+  ec_uint32 v;
+  int       ret;
+  int       m;
+  ret=!!_v;
+  m=!!(_v&0xFFFFFFFF00000000)<<5;
+  v=(ec_uint32)(_v>>m);
+  ret|=m;
+  m=!!(v&0xFFFF0000)<<4;
+  v>>=m;
+  ret|=m;
+  m=!!(v&0xFF00)<<3;
+  v>>=m;
+  ret|=m;
+  m=!!(v&0xF0)<<2;
+  v>>=m;
+  ret|=m;
+  m=!!(v&0xC)<<1;
+  v>>=m;
+  ret|=m;
+  ret+=!!(v&0x2);
+  return ret;
+#endif
+}
diff --git a/libentcode/entcode.h b/libentcode/entcode.h
index 404a706d..b42797f7 100644
--- a/libentcode/entcode.h
+++ b/libentcode/entcode.h
@@ -6,6 +6,7 @@
 
 
 typedef unsigned ec_uint32;
+typedef unsigned long long ec_uint64;
 typedef struct ec_byte_buffer ec_byte_buffer;
 
 
@@ -45,5 +46,6 @@ long ec_byte_bytes(ec_byte_buffer *_b);
 unsigned char *ec_byte_get_buffer(ec_byte_buffer *_b);
 
 int ec_ilog(ec_uint32 _v);
+int ec_ilog64(ec_uint64 _v);
 
 #endif
diff --git a/libentcode/entdec.c b/libentcode/entdec.c
index 9712155e..3ec7cf53 100644
--- a/libentcode/entdec.c
+++ b/libentcode/entdec.c
@@ -96,10 +96,20 @@ ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb){
   return t;
 }
 
+ec_uint64 ec_dec_bits64(ec_dec *_this,int _ftb){
+  ec_uint64 t;
+  if(_ftb>32){
+    t=ec_dec_bits(_this,32);
+    _ftb-=32;
+  }
+  else t=0;
+  return t<<_ftb|ec_dec_bits(_this,_ftb);
+}
+
 ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
   ec_uint32 mask;
-  ec_uint32 ft;
   ec_uint32 t;
+  unsigned  ft;
   unsigned  s;
   int       ftb;
   t=0;
@@ -107,7 +117,7 @@ ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
   ftb=EC_ILOG(_ft);
   while(ftb>EC_UNIT_BITS){
     ftb-=EC_UNIT_BITS;
-    ft=(_ft>>ftb)+1;
+    ft=(unsigned)(_ft>>ftb)+1;
     s=ec_decode(_this,ft);
     ec_dec_update(_this,s,s+1,ft);
     t=t<<EC_UNIT_BITS|s;
@@ -116,8 +126,34 @@ ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
     _ft=_ft&mask;
   }
   _ft++;
-  s=ec_decode(_this,_ft);
-  ec_dec_update(_this,s,s+1,_ft);
+  s=ec_decode(_this,(unsigned)_ft);
+  ec_dec_update(_this,s,s+1,(unsigned)_ft);
+  t=t<<ftb|s;
+  return t;
+}
+
+ec_uint64 ec_dec_uint64(ec_dec *_this,ec_uint64 _ft){
+  ec_uint64 mask;
+  ec_uint64 t;
+  unsigned  ft;
+  unsigned  s;
+  int       ftb;
+  t=0;
+  _ft--;
+  ftb=EC_ILOG64(_ft);
+  while(ftb>EC_UNIT_BITS){
+    ftb-=EC_UNIT_BITS;
+    ft=(unsigned)(_ft>>ftb)+1;
+    s=ec_decode(_this,ft);
+    ec_dec_update(_this,s,s+1,ft);
+    t=t<<EC_UNIT_BITS|s;
+    if(s<ft-1)return t<<ftb|ec_dec_bits64(_this,ftb);
+    mask=((ec_uint64)1<<ftb)-1;
+    _ft=_ft&mask;
+  }
+  _ft++;
+  s=ec_decode(_this,(unsigned)_ft);
+  ec_dec_update(_this,s,s+1,(unsigned)_ft);
   t=t<<ftb|s;
   return t;
 }
diff --git a/libentcode/entdec.h b/libentcode/entdec.h
index ddcbb6f7..bedb55ea 100644
--- a/libentcode/entdec.h
+++ b/libentcode/entdec.h
@@ -64,6 +64,13 @@ void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,
         This must be at least one, and no more than 32.
   Return: The decoded bits.*/
 ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb);
+/*Extracts a sequence of raw bits from the stream.
+  The bits must have been encoded with ec_enc_bits64().
+  No call to ec_dec_update() is necessary after this call.
+  _ftb: The number of bits to extract.
+        This must be at least one, and no more than 64.
+  Return: The decoded bits.*/
+ec_uint64 ec_dec_bits64(ec_dec *_this,int _ftb);
 /*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
   The bits must have been encoded with ec_enc_uint().
   No call to ec_dec_update() is necessary after this call.
@@ -71,5 +78,12 @@ ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb);
        This must be at least one, and no more than 2**32-1.
   Return: The decoded bits.*/
 ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft);
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+  The bits must have been encoded with ec_enc_uint64().
+  No call to ec_dec_update() is necessary after this call.
+  _ft: The number of integers that can be decoded (one more than the max).
+       This must be at least one, and no more than 2**64-1.
+  Return: The decoded bits.*/
+ec_uint64 ec_dec_uint64(ec_dec *_this,ec_uint64 _ft);
 
 #endif
diff --git a/libentcode/entenc.c b/libentcode/entenc.c
index d6a39254..188aa424 100644
--- a/libentcode/entenc.c
+++ b/libentcode/entenc.c
@@ -70,13 +70,24 @@ void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,int _ftb){
     ec_encode(_this,fl,fl+1,EC_UNIT_MASK+1);
   }
   ft=1<<_ftb;
-  fl=_fl&ft-1;
+  fl=(unsigned)_fl&ft-1;
   ec_encode(_this,fl,fl+1,ft);
 }
 
+void ec_enc_bits64(ec_enc *_this,ec_uint64 _fl,int _ftb){
+  ec_uint32 fl;
+  ec_uint32 ft;
+  if(_ftb>32){
+    _ftb-=32;
+    fl=(ec_uint32)(_fl>>_ftb)&0xFFFFFFFF;
+    ec_enc_bits(_this,fl,32);
+  }
+  ec_enc_bits(_this,(ec_uint32)_fl,_ftb);
+}
+
 void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
   ec_uint32 mask;
-  ec_uint32 ft;
+  unsigned  ft;
   unsigned  fl;
   int       ftb;
   _ft--;
@@ -96,3 +107,26 @@ void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
   }
   ec_encode(_this,_fl,_fl+1,_ft+1);
 }
+
+void ec_enc_uint64(ec_enc *_this,ec_uint64 _fl,ec_uint64 _ft){
+  ec_uint64 mask;
+  unsigned  ft;
+  unsigned  fl;
+  int       ftb;
+  _ft--;
+  ftb=EC_ILOG64(_ft);
+  while(ftb>EC_UNIT_BITS){
+    ftb-=EC_UNIT_BITS;
+    ft=(unsigned)(_ft>>ftb)+1;
+    fl=(unsigned)(_fl>>ftb);
+    ec_encode(_this,fl,fl+1,ft);
+    if(fl<ft-1){
+      ec_enc_bits64(_this,_fl,ftb);
+      return;
+    }
+    mask=((ec_uint64)1<<ftb)-1;
+    _fl=_fl&mask;
+    _ft=_ft&mask;
+  }
+  ec_encode(_this,_fl,_fl+1,_ft+1);
+}
diff --git a/libentcode/entenc.h b/libentcode/entenc.h
index 04ed0c14..1ba891cf 100644
--- a/libentcode/entenc.h
+++ b/libentcode/entenc.h
@@ -47,11 +47,21 @@ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft);
   _ftb: The number of bits to encode.
         This must be at least one, and no more than 32.*/
 void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,int _ftb);
+/*Encodes a sequence of raw bits in the stream.
+  _fl:  The bits to encode.
+  _ftb: The number of bits to encode.
+        This must be at least one, and no more than 64.*/
+void ec_enc_bits64(ec_enc *_this,ec_uint64 _fl,int _ftb);
 /*Encodes a raw unsigned integer in the stream.
   _fl: The integer to encode.
   _ft: The number of integers that can be encoded (one more than the max).
        This must be at least one, and no more than 2**32-1.*/
 void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft);
+/*Encodes a raw unsigned integer in the stream.
+  _fl: The integer to encode.
+  _ft: The number of integers that can be encoded (one more than the max).
+       This must be at least one, and no more than 2**64-1.*/
+void ec_enc_uint64(ec_enc *_this,ec_uint64 _fl,ec_uint64 _ft);
 
 /*Indicates that there are no more symbols to encode.
   All reamining output bytes are flushed to the output buffer.
diff --git a/libentcode/mfrngdec.c b/libentcode/mfrngdec.c
index 6a5eb604..caf1410f 100644
--- a/libentcode/mfrngdec.c
+++ b/libentcode/mfrngdec.c
@@ -114,7 +114,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }
   @INPROCEEDINGS{SM98,
    author="Lang Stuiver and Alistair Moffat",
diff --git a/libentcode/mfrngenc.c b/libentcode/mfrngenc.c
index 31b4d607..ec861414 100644
--- a/libentcode/mfrngenc.c
+++ b/libentcode/mfrngenc.c
@@ -26,7 +26,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }
   @INPROCEEDINGS{SM98,
    author="Lang Stuiver and Alistair Moffat",
diff --git a/libentcode/rangedec.c b/libentcode/rangedec.c
index 5b2d6e12..76817472 100644
--- a/libentcode/rangedec.c
+++ b/libentcode/rangedec.c
@@ -4,7 +4,7 @@
 
 
 
-/*A multiply-free range decoder.
+/*A range decoder.
   This is an entropy decoder based upon \cite{Mar79}, which is itself a
    rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
   It is very similar to arithmetic encoding, except that encoding is done with
@@ -26,16 +26,8 @@
    encoding for efficiency actually re-discovers many of the principles
    behind range encoding, and presents a good theoretical analysis of them.
 
-  The coder is made multiply-free by replacing the standard multiply/divide
-   used to partition the current interval according to the total frequency
-   count.
-  The new partition function scales the count so that it differs from the size
-   of the interval by no more than a factor of two and then assigns each symbol
-   one or two code words in the interval.
-  For details see \cite{SM98}.
-
-  This coder also handles the end of the stream in a slightly more graceful
-   fashion than most arithmetic or range coders.
+  This coder handles the end of the stream in a slightly more graceful fashion
+   than most arithmetic or range coders.
   Once the final symbol has been encoded, the coder selects the code word with
    the shortest number of bits that still falls within the final interval.
   This method is not novel.
@@ -114,16 +106,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
-  }
-  @INPROCEEDINGS{SM98,
-   author="Lang Stuiver and Alistair Moffat",
-   title="Piecewise Integer Mapping for Arithmetic Coding",
-   booktitle="Proceedings of the {IEEE} Data Compression Conference",
-   pages="1--10",
-   address="Snowbird, UT",
-   month="Mar./Apr.",
-   year=1998
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }*/
 
 
diff --git a/libentcode/rangeenc.c b/libentcode/rangeenc.c
index bb5384b4..dcd9db5b 100644
--- a/libentcode/rangeenc.c
+++ b/libentcode/rangeenc.c
@@ -4,9 +4,9 @@
 
 
 
-/*A multiply-free range encoder.
-  See mfrngdec.c and the references for implementation details
-   \cite{Mar79,MNW98,SM98}.
+/*A range encoder.
+  See rangedec.c and the references for implementation details
+   \cite{Mar79,MNW98}.
 
   @INPROCEEDINGS{Mar79,
    author="Martin, G.N.N.",
@@ -26,16 +26,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
-  }
-  @INPROCEEDINGS{SM98,
-   author="Lang Stuiver and Alistair Moffat",
-   title="Piecewise Integer Mapping for Arithmetic Coding",
-   booktitle="Proceedings of the {IEEE} Data Compression Conference",
-   pages="1--10",
-   address="Snowbird, UT",
-   month="Mar./Apr.",
-   year=1998
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }*/
 
 
-- 
GitLab