From f13fea7b530a178ff69dfe4682f3d50ac79537e0 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Tue, 11 Dec 2007 13:25:57 +1100
Subject: [PATCH] Added ec_{enc|dec}_bits64 and ec_{enc|dec}_bits64. Updated
 broken paper URLs. Cleaned up non-multiply-free range coder documentation
 (e.g., stop claiming it's multiply-free).

---
 libentcode/bitree.h   |  4 ++--
 libentcode/ecintrin.h | 20 ++++++++++++++++++++
 libentcode/ectest.c   | 13 +++++++++++++
 libentcode/entcode.c  | 28 +++++++++++++++++++++++++++
 libentcode/entcode.h  |  2 ++
 libentcode/entdec.c   | 44 +++++++++++++++++++++++++++++++++++++++----
 libentcode/entdec.h   | 14 ++++++++++++++
 libentcode/entenc.c   | 38 +++++++++++++++++++++++++++++++++++--
 libentcode/entenc.h   | 10 ++++++++++
 libentcode/mfrngdec.c |  2 +-
 libentcode/mfrngenc.c |  2 +-
 libentcode/rangedec.c | 25 ++++--------------------
 libentcode/rangeenc.c | 17 ++++-------------
 13 files changed, 175 insertions(+), 44 deletions(-)

diff --git a/libentcode/bitree.h b/libentcode/bitree.h
index 01efc1654..1eda40447 100644
--- a/libentcode/bitree.h
+++ b/libentcode/bitree.h
@@ -33,7 +33,7 @@
    year       =1993,
    number     =88,
    month      =May,
-   URL        ="http://www.cs.auckland.ac.nz/~peter-f/ftplink/TechRep88.ps"
+   URL        ="http://www.cs.auckland.ac.nz/~peter-f/FTPfiles/TechRep88.ps"
   }
   @TECHREPORT{Fen95,
    author     ="Peter Fenwick",
@@ -43,7 +43,7 @@
    year       =1995,
    number     =110,
    month      =Feb,
-   URL        ="http://www.cs.auckland.ac.nz/~peter-f/ftplink/TechRep110.ps"
+   URL        ="http://www.cs.auckland.ac.nz/~peter-f/FTPfiles/TechRep110.ps"
   }
   @ARTICLE{Mof99,
     author    ="Alistair Moffat",
diff --git a/libentcode/ecintrin.h b/libentcode/ecintrin.h
index d13dabd92..4c00596a6 100644
--- a/libentcode/ecintrin.h
+++ b/libentcode/ecintrin.h
@@ -67,5 +67,25 @@
 #else
 # define EC_ILOG(_x) (ec_ilog(_x))
 #endif
+#if __GNUC_PREREQ(3,4)
+# if INT_MAX>=9223372036854775807
+#  define EC_CLZ64_0 sizeof(unsigned)*CHAR_BIT
+#  define EC_CLZ64(_x) (__builtin_clz(_x))
+# elif LONG_MAX>=9223372036854775807L
+#  define EC_CLZ64_0 sizeof(unsigned long)*CHAR_BIT
+#  define EC_CLZ64(_x) (__builtin_clzl(_x))
+# elif LLONG_MAX>=9223372036854775807LL
+#  define EC_CLZ64_0 sizeof(unsigned long long)*CHAR_BIT
+#  define EC_CLZ64(_x) (__builtin_clzll(_x))
+# endif
+#endif
+#if defined(EC_CLZ64)
+/*Note that __builtin_clz is not defined when _x==0, according to the gcc
+   documentation (and that of the BSR instruction that implements it on x86),
+   so we have to special-case it.*/
+# define EC_ILOG64(_x) (EC_CLZ64_0-EC_CLZ64(_x)&-!!(_x))
+#else
+# define EC_ILOG64(_x) (ec_ilog64(_x))
+#endif
 
 #endif
diff --git a/libentcode/ectest.c b/libentcode/ectest.c
index a08222a52..c254722dd 100644
--- a/libentcode/ectest.c
+++ b/libentcode/ectest.c
@@ -7,6 +7,7 @@ int main(int _argc,char **_argv){
   ec_enc         enc;
   ec_dec         dec;
   ec_probmod     mod;
+  ec_uint64      sym64;
   int            ft;
   int            ftb;
   int            sym;
@@ -19,12 +20,14 @@ int main(int _argc,char **_argv){
   for(ft=0;ft<1024;ft++){
     for(i=0;i<ft;i++){
       ec_enc_uint(&enc,i,ft);
+      ec_enc_uint64(&enc,(ec_uint64)i<<30|i,(ec_uint64)ft<<30);
     }
   }
   /*Testing encoding of raw bit values.*/
   for(ftb=0;ftb<16;ftb++){
     for(i=0;i<(1<<ftb);i++){
       ec_enc_bits(&enc,i,ftb);
+      ec_enc_bits64(&enc,(ec_uint64)i<<30|i,ftb+30);
     }
   }
   for(sz=1;sz<256;sz++){
@@ -54,6 +57,11 @@ int main(int _argc,char **_argv){
         fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft);
         return -1;
       }
+      sym64=ec_dec_uint64(&dec,(ec_uint64)ft<<30);
+      if(sym64!=((ec_uint64)i<<30|i)){
+        fprintf(stderr,"Decoded %lli instead of %lli with ft of %lli.\n",sym64,
+         (ec_uint64)i<<30|i,(ec_uint64)ft<<30);
+      }
     }
   }
   for(ftb=0;ftb<16;ftb++){
@@ -63,6 +71,11 @@ int main(int _argc,char **_argv){
         fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb);
         return -1;
       }
+      sym64=ec_dec_bits64(&dec,ftb+30);
+      if(sym64!=((ec_uint64)i<<30|i)){
+        fprintf(stderr,"Decoded %lli instead of %lli with ftb of %i.\n",
+         sym64,(ec_uint64)i<<30|i,ftb+30);
+      }
     }
   }
   for(sz=1;sz<256;sz++){
diff --git a/libentcode/entcode.c b/libentcode/entcode.c
index a4e5100e7..3ace831f3 100644
--- a/libentcode/entcode.c
+++ b/libentcode/entcode.c
@@ -42,3 +42,31 @@ int ec_ilog(ec_uint32 _v){
   return ret;
 #endif
 }
+
+int ec_ilog64(ec_uint64 _v){
+#if defined(EC_CLZ64)
+  return EC_CLZ64_0-EC_CLZ64(_v)&-!!_v;
+#else
+  ec_uint32 v;
+  int       ret;
+  int       m;
+  ret=!!_v;
+  m=!!(_v&0xFFFFFFFF00000000)<<5;
+  v=(ec_uint32)(_v>>m);
+  ret|=m;
+  m=!!(v&0xFFFF0000)<<4;
+  v>>=m;
+  ret|=m;
+  m=!!(v&0xFF00)<<3;
+  v>>=m;
+  ret|=m;
+  m=!!(v&0xF0)<<2;
+  v>>=m;
+  ret|=m;
+  m=!!(v&0xC)<<1;
+  v>>=m;
+  ret|=m;
+  ret+=!!(v&0x2);
+  return ret;
+#endif
+}
diff --git a/libentcode/entcode.h b/libentcode/entcode.h
index 404a706dd..b42797f75 100644
--- a/libentcode/entcode.h
+++ b/libentcode/entcode.h
@@ -6,6 +6,7 @@
 
 
 typedef unsigned ec_uint32;
+typedef unsigned long long ec_uint64;
 typedef struct ec_byte_buffer ec_byte_buffer;
 
 
@@ -45,5 +46,6 @@ long ec_byte_bytes(ec_byte_buffer *_b);
 unsigned char *ec_byte_get_buffer(ec_byte_buffer *_b);
 
 int ec_ilog(ec_uint32 _v);
+int ec_ilog64(ec_uint64 _v);
 
 #endif
diff --git a/libentcode/entdec.c b/libentcode/entdec.c
index 9712155e4..3ec7cf534 100644
--- a/libentcode/entdec.c
+++ b/libentcode/entdec.c
@@ -96,10 +96,20 @@ ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb){
   return t;
 }
 
+ec_uint64 ec_dec_bits64(ec_dec *_this,int _ftb){
+  ec_uint64 t;
+  if(_ftb>32){
+    t=ec_dec_bits(_this,32);
+    _ftb-=32;
+  }
+  else t=0;
+  return t<<_ftb|ec_dec_bits(_this,_ftb);
+}
+
 ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
   ec_uint32 mask;
-  ec_uint32 ft;
   ec_uint32 t;
+  unsigned  ft;
   unsigned  s;
   int       ftb;
   t=0;
@@ -107,7 +117,7 @@ ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
   ftb=EC_ILOG(_ft);
   while(ftb>EC_UNIT_BITS){
     ftb-=EC_UNIT_BITS;
-    ft=(_ft>>ftb)+1;
+    ft=(unsigned)(_ft>>ftb)+1;
     s=ec_decode(_this,ft);
     ec_dec_update(_this,s,s+1,ft);
     t=t<<EC_UNIT_BITS|s;
@@ -116,8 +126,34 @@ ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
     _ft=_ft&mask;
   }
   _ft++;
-  s=ec_decode(_this,_ft);
-  ec_dec_update(_this,s,s+1,_ft);
+  s=ec_decode(_this,(unsigned)_ft);
+  ec_dec_update(_this,s,s+1,(unsigned)_ft);
+  t=t<<ftb|s;
+  return t;
+}
+
+ec_uint64 ec_dec_uint64(ec_dec *_this,ec_uint64 _ft){
+  ec_uint64 mask;
+  ec_uint64 t;
+  unsigned  ft;
+  unsigned  s;
+  int       ftb;
+  t=0;
+  _ft--;
+  ftb=EC_ILOG64(_ft);
+  while(ftb>EC_UNIT_BITS){
+    ftb-=EC_UNIT_BITS;
+    ft=(unsigned)(_ft>>ftb)+1;
+    s=ec_decode(_this,ft);
+    ec_dec_update(_this,s,s+1,ft);
+    t=t<<EC_UNIT_BITS|s;
+    if(s<ft-1)return t<<ftb|ec_dec_bits64(_this,ftb);
+    mask=((ec_uint64)1<<ftb)-1;
+    _ft=_ft&mask;
+  }
+  _ft++;
+  s=ec_decode(_this,(unsigned)_ft);
+  ec_dec_update(_this,s,s+1,(unsigned)_ft);
   t=t<<ftb|s;
   return t;
 }
diff --git a/libentcode/entdec.h b/libentcode/entdec.h
index ddcbb6f71..bedb55eaf 100644
--- a/libentcode/entdec.h
+++ b/libentcode/entdec.h
@@ -64,6 +64,13 @@ void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,
         This must be at least one, and no more than 32.
   Return: The decoded bits.*/
 ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb);
+/*Extracts a sequence of raw bits from the stream.
+  The bits must have been encoded with ec_enc_bits64().
+  No call to ec_dec_update() is necessary after this call.
+  _ftb: The number of bits to extract.
+        This must be at least one, and no more than 64.
+  Return: The decoded bits.*/
+ec_uint64 ec_dec_bits64(ec_dec *_this,int _ftb);
 /*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
   The bits must have been encoded with ec_enc_uint().
   No call to ec_dec_update() is necessary after this call.
@@ -71,5 +78,12 @@ ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb);
        This must be at least one, and no more than 2**32-1.
   Return: The decoded bits.*/
 ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft);
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+  The bits must have been encoded with ec_enc_uint64().
+  No call to ec_dec_update() is necessary after this call.
+  _ft: The number of integers that can be decoded (one more than the max).
+       This must be at least one, and no more than 2**64-1.
+  Return: The decoded bits.*/
+ec_uint64 ec_dec_uint64(ec_dec *_this,ec_uint64 _ft);
 
 #endif
diff --git a/libentcode/entenc.c b/libentcode/entenc.c
index d6a392542..188aa4241 100644
--- a/libentcode/entenc.c
+++ b/libentcode/entenc.c
@@ -70,13 +70,24 @@ void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,int _ftb){
     ec_encode(_this,fl,fl+1,EC_UNIT_MASK+1);
   }
   ft=1<<_ftb;
-  fl=_fl&ft-1;
+  fl=(unsigned)_fl&ft-1;
   ec_encode(_this,fl,fl+1,ft);
 }
 
+void ec_enc_bits64(ec_enc *_this,ec_uint64 _fl,int _ftb){
+  ec_uint32 fl;
+  ec_uint32 ft;
+  if(_ftb>32){
+    _ftb-=32;
+    fl=(ec_uint32)(_fl>>_ftb)&0xFFFFFFFF;
+    ec_enc_bits(_this,fl,32);
+  }
+  ec_enc_bits(_this,(ec_uint32)_fl,_ftb);
+}
+
 void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
   ec_uint32 mask;
-  ec_uint32 ft;
+  unsigned  ft;
   unsigned  fl;
   int       ftb;
   _ft--;
@@ -96,3 +107,26 @@ void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
   }
   ec_encode(_this,_fl,_fl+1,_ft+1);
 }
+
+void ec_enc_uint64(ec_enc *_this,ec_uint64 _fl,ec_uint64 _ft){
+  ec_uint64 mask;
+  unsigned  ft;
+  unsigned  fl;
+  int       ftb;
+  _ft--;
+  ftb=EC_ILOG64(_ft);
+  while(ftb>EC_UNIT_BITS){
+    ftb-=EC_UNIT_BITS;
+    ft=(unsigned)(_ft>>ftb)+1;
+    fl=(unsigned)(_fl>>ftb);
+    ec_encode(_this,fl,fl+1,ft);
+    if(fl<ft-1){
+      ec_enc_bits64(_this,_fl,ftb);
+      return;
+    }
+    mask=((ec_uint64)1<<ftb)-1;
+    _fl=_fl&mask;
+    _ft=_ft&mask;
+  }
+  ec_encode(_this,_fl,_fl+1,_ft+1);
+}
diff --git a/libentcode/entenc.h b/libentcode/entenc.h
index 04ed0c14d..1ba891cf9 100644
--- a/libentcode/entenc.h
+++ b/libentcode/entenc.h
@@ -47,11 +47,21 @@ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft);
   _ftb: The number of bits to encode.
         This must be at least one, and no more than 32.*/
 void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,int _ftb);
+/*Encodes a sequence of raw bits in the stream.
+  _fl:  The bits to encode.
+  _ftb: The number of bits to encode.
+        This must be at least one, and no more than 64.*/
+void ec_enc_bits64(ec_enc *_this,ec_uint64 _fl,int _ftb);
 /*Encodes a raw unsigned integer in the stream.
   _fl: The integer to encode.
   _ft: The number of integers that can be encoded (one more than the max).
        This must be at least one, and no more than 2**32-1.*/
 void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft);
+/*Encodes a raw unsigned integer in the stream.
+  _fl: The integer to encode.
+  _ft: The number of integers that can be encoded (one more than the max).
+       This must be at least one, and no more than 2**64-1.*/
+void ec_enc_uint64(ec_enc *_this,ec_uint64 _fl,ec_uint64 _ft);
 
 /*Indicates that there are no more symbols to encode.
   All reamining output bytes are flushed to the output buffer.
diff --git a/libentcode/mfrngdec.c b/libentcode/mfrngdec.c
index 6a5eb604d..caf1410f0 100644
--- a/libentcode/mfrngdec.c
+++ b/libentcode/mfrngdec.c
@@ -114,7 +114,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }
   @INPROCEEDINGS{SM98,
    author="Lang Stuiver and Alistair Moffat",
diff --git a/libentcode/mfrngenc.c b/libentcode/mfrngenc.c
index 31b4d607a..ec861414b 100644
--- a/libentcode/mfrngenc.c
+++ b/libentcode/mfrngenc.c
@@ -26,7 +26,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }
   @INPROCEEDINGS{SM98,
    author="Lang Stuiver and Alistair Moffat",
diff --git a/libentcode/rangedec.c b/libentcode/rangedec.c
index 5b2d6e12a..76817472f 100644
--- a/libentcode/rangedec.c
+++ b/libentcode/rangedec.c
@@ -4,7 +4,7 @@
 
 
 
-/*A multiply-free range decoder.
+/*A range decoder.
   This is an entropy decoder based upon \cite{Mar79}, which is itself a
    rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
   It is very similar to arithmetic encoding, except that encoding is done with
@@ -26,16 +26,8 @@
    encoding for efficiency actually re-discovers many of the principles
    behind range encoding, and presents a good theoretical analysis of them.
 
-  The coder is made multiply-free by replacing the standard multiply/divide
-   used to partition the current interval according to the total frequency
-   count.
-  The new partition function scales the count so that it differs from the size
-   of the interval by no more than a factor of two and then assigns each symbol
-   one or two code words in the interval.
-  For details see \cite{SM98}.
-
-  This coder also handles the end of the stream in a slightly more graceful
-   fashion than most arithmetic or range coders.
+  This coder handles the end of the stream in a slightly more graceful fashion
+   than most arithmetic or range coders.
   Once the final symbol has been encoded, the coder selects the code word with
    the shortest number of bits that still falls within the final interval.
   This method is not novel.
@@ -114,16 +106,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
-  }
-  @INPROCEEDINGS{SM98,
-   author="Lang Stuiver and Alistair Moffat",
-   title="Piecewise Integer Mapping for Arithmetic Coding",
-   booktitle="Proceedings of the {IEEE} Data Compression Conference",
-   pages="1--10",
-   address="Snowbird, UT",
-   month="Mar./Apr.",
-   year=1998
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }*/
 
 
diff --git a/libentcode/rangeenc.c b/libentcode/rangeenc.c
index bb5384b4a..dcd9db5bb 100644
--- a/libentcode/rangeenc.c
+++ b/libentcode/rangeenc.c
@@ -4,9 +4,9 @@
 
 
 
-/*A multiply-free range encoder.
-  See mfrngdec.c and the references for implementation details
-   \cite{Mar79,MNW98,SM98}.
+/*A range encoder.
+  See rangedec.c and the references for implementation details
+   \cite{Mar79,MNW98}.
 
   @INPROCEEDINGS{Mar79,
    author="Martin, G.N.N.",
@@ -26,16 +26,7 @@
    number=3,
    pages="256--294",
    month=Jul,
-   URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
-  }
-  @INPROCEEDINGS{SM98,
-   author="Lang Stuiver and Alistair Moffat",
-   title="Piecewise Integer Mapping for Arithmetic Coding",
-   booktitle="Proceedings of the {IEEE} Data Compression Conference",
-   pages="1--10",
-   address="Snowbird, UT",
-   month="Mar./Apr.",
-   year=1998
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
   }*/
 
 
-- 
GitLab