From 30df6cf3f84c81eb0d3e6940670fda729e22d873 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Tue, 21 Dec 2010 08:42:26 -0800
Subject: [PATCH] Entropy coder clean-up.

This simplifies a good bit of the error handling, and should make it
 impossible to overrun the buffer in the encoder or decoder, while
 still allowing tell() to operate correctly after a bust.
The encoder now tries to keep the range coder data intact after a
 bust instead of corrupting it with extra bits data, though this is
 not a guarantee (too many extra bits may have already been flushed).
It also now correctly reports errors when the bust occurs merging the
 last byte of range coder and extra bits.

A number of abstraction barrier violations were cleaned up, as well.
This patch also includes a number of minor performance improvements:
 ec_{enc|dec}_bits() in particular should be much faster.

Finally, tf_select was changed to be coded with the range coder
 rather than extra bits, so that it is at the front of the packet
 (for unequal error protection robustness).
---
 libcelt/celt.c        |   6 +-
 libcelt/entcode.h     |  36 ++++++-----
 libcelt/entdec.c      |  50 ++++++---------
 libcelt/entdec.h      |  18 +++---
 libcelt/entenc.c      |  83 ++++++++++++------------
 libcelt/entenc.h      |  15 ++---
 libcelt/quant_bands.c |  16 +++--
 libcelt/rangedec.c    |  97 ++++++++++++++---------------
 libcelt/rangeenc.c    | 142 ++++++++++++++++++++++++------------------
 tests/ectest.c        |  21 ++++++-
 10 files changed, 262 insertions(+), 222 deletions(-)

diff --git a/libcelt/celt.c b/libcelt/celt.c
index a29d94cb..0e2f286f 100644
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -600,7 +600,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
       curr = tf_res[i];
    }
    if (LM!=0)
-      ec_enc_bits(enc, tf_select, 1);
+      ec_enc_bit_logp(enc, tf_select, 1);
    for (i=start;i<end;i++)
       tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
    /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/
@@ -617,7 +617,7 @@ static void tf_decode(int start, int end, int C, int isTransient, int *tf_res, i
       curr = tf_res[i];
    }
    if (LM!=0)
-      tf_select = ec_dec_bits(dec, 1);
+      tf_select = ec_dec_bit_logp(dec, 1);
    else
       tf_select = 0;
    for (i=start;i<end;i++)
@@ -1953,7 +1953,7 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    deemphasis(out_syn, pcm, N, C, st->mode->preemph, st->preemph_memD);
    st->loss_count = 0;
    RESTORE_STACK;
-   if (ec_dec_get_error(dec))
+   if (ec_dec_tell(dec,0) > 8*len || ec_dec_get_error(dec))
       return CELT_CORRUPTED_DATA;
    else
       return CELT_OK;
diff --git a/libcelt/entcode.h b/libcelt/entcode.h
index bc01a58c..ea88d7ed 100644
--- a/libcelt/entcode.h
+++ b/libcelt/entcode.h
@@ -34,54 +34,56 @@
 #if !defined(_entcode_H)
 # define _entcode_H (1)
 # include <limits.h>
+# include <stddef.h>
 # include "ecintrin.h"
 
 
 
-typedef celt_int32 ec_int32;
-typedef celt_uint32 ec_uint32;
+typedef celt_int32            ec_int32;
+typedef celt_uint32           ec_uint32;
+typedef size_t                ec_window;
 typedef struct ec_byte_buffer ec_byte_buffer;
 
 
 
-/*The number of bits to code at a time when coding bits directly.*/
-# define EC_UNIT_BITS  (8)
-/*The mask for the given bits.*/
-# define EC_UNIT_MASK  ((1U<<EC_UNIT_BITS)-1)
+/*This must be at least 32 bits.*/
+# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT)
+
+/*The number of bits to use for the range-coded part of unsigned integers.*/
+# define EC_UINT_BITS   (8)
 
 
 
 /*Simple libogg1-style buffer.*/
 struct ec_byte_buffer{
   unsigned char *buf;
-  unsigned char *ptr;
-  unsigned char *end_ptr;
+  ec_uint32      offs;
+  ec_uint32      end_offs;
   ec_uint32      storage;
 };
 
 /*Encoding functions.*/
 void ec_byte_writeinit_buffer(ec_byte_buffer *_b, unsigned char *_buf, ec_uint32 _size);
 void ec_byte_shrink(ec_byte_buffer *_b, ec_uint32 _size);
-int ec_byte_write1(ec_byte_buffer *_b,unsigned _value);
+int ec_byte_write(ec_byte_buffer *_b,unsigned _value);
 int ec_byte_write_at_end(ec_byte_buffer *_b,unsigned _value);
+int ec_byte_write_done(ec_byte_buffer *_b,int _start_bits_available,
+ unsigned _end_byte,int _end_bits_used);
 /*Decoding functions.*/
 void ec_byte_readinit(ec_byte_buffer *_b,unsigned char *_buf,ec_uint32 _bytes);
-unsigned char ec_byte_look_at_end(ec_byte_buffer *_b);
-int ec_byte_look4(ec_byte_buffer *_b,ec_uint32 *_val);
-void ec_byte_adv1(ec_byte_buffer *_b);
-void ec_byte_adv4(ec_byte_buffer *_b);
-int ec_byte_read1(ec_byte_buffer *_b);
+int ec_byte_read(ec_byte_buffer *_b);
+unsigned char ec_byte_read_from_end(ec_byte_buffer *_b);
 /*Shared functions.*/
 static inline void ec_byte_reset(ec_byte_buffer *_b){
-   _b->ptr=_b->buf;
+  _b->offs=_b->end_offs=0;
 }
 
 static inline ec_uint32 ec_byte_bytes(ec_byte_buffer *_b){
-   return _b->ptr-_b->buf;
+  return _b->offs;
 }
 
 static inline unsigned char *ec_byte_get_buffer(ec_byte_buffer *_b){
-   return _b->buf;
+  return _b->buf;
 }
 
 int ec_ilog(ec_uint32 _v);
diff --git a/libcelt/entdec.c b/libcelt/entdec.c
index d7f4aa7e..c8b7df41 100644
--- a/libcelt/entdec.c
+++ b/libcelt/entdec.c
@@ -39,50 +39,39 @@
 #include "arch.h"
 
 void ec_byte_readinit(ec_byte_buffer *_b,unsigned char *_buf,ec_uint32 _bytes){
-  _b->buf=_b->ptr=_buf;
+  _b->buf=_buf;
+  _b->offs=_b->end_offs=0;
   _b->storage=_bytes;
-  _b->end_ptr=_b->buf+_bytes-1;
 }
 
-unsigned char ec_byte_look_at_end(ec_byte_buffer *_b){
-  celt_assert2 (_b->end_ptr >= _b->buf, "Trying to read raw bits before the beginning of the stream");
-  return *(_b->end_ptr--);
+int ec_byte_read(ec_byte_buffer *_b){
+  return _b->offs<_b->storage?_b->buf[_b->offs++]:0;
 }
 
-void ec_byte_adv1(ec_byte_buffer *_b){
-  _b->ptr++;
-}
-
-int ec_byte_read1(ec_byte_buffer *_b){
-  ptrdiff_t endbyte;
-  endbyte=_b->ptr-_b->buf;
-  if(endbyte>=_b->storage)return -1;
-  else return *(_b->ptr++);
+unsigned char ec_byte_read_from_end(ec_byte_buffer *_b){
+  return _b->end_offs<_b->storage?_b->buf[_b->storage-++(_b->end_offs)]:0;
 }
 
 ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
-  unsigned  ft;
-  unsigned  s;
-  int       ftb;
+  unsigned ft;
+  unsigned s;
+  int      ftb;
   /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
   celt_assert(_ft>1);
   _ft--;
   ftb=EC_ILOG(_ft);
-  if(ftb>EC_UNIT_BITS){
+  if(ftb>EC_UINT_BITS){
     ec_uint32 t;
-    ftb-=EC_UNIT_BITS;
+    ftb-=EC_UINT_BITS;
     ft=(unsigned)(_ft>>ftb)+1;
     s=ec_decode(_this,ft);
     ec_dec_update(_this,s,s+1,ft);
-    t=s;
-    t = t<<ftb|ec_dec_bits(_this,ftb&(1<<ftb)-1);
-    if (t>_ft)
-    {
-      _this->error |= 1;
-      t = _ft;
-    }
-    return t;
-  } else {
+    t=s<<ftb|ec_dec_bits(_this,ftb);
+    if(t<=_ft)return t;
+    _this->error=1;
+    return _ft;
+  }
+  else{
     _ft++;
     s=ec_decode(_this,(unsigned)_ft);
     ec_dec_update(_this,s,s+1,(unsigned)_ft);
@@ -90,7 +79,6 @@ ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
   }
 }
 
-int ec_dec_get_error(ec_dec *_this)
-{
-  return _this->error || (ec_dec_tell(_this,0) > 8*_this->buf->storage);
+int ec_dec_get_error(ec_dec *_this){
+  return _this->error;
 }
diff --git a/libcelt/entdec.h b/libcelt/entdec.h
index f163b4d6..7617f711 100644
--- a/libcelt/entdec.h
+++ b/libcelt/entdec.h
@@ -31,6 +31,7 @@
 
 #if !defined(_entdec_H)
 # define _entdec_H (1)
+# include <limits.h>
 # include "entcode.h"
 
 
@@ -52,12 +53,13 @@ struct ec_dec{
    ec_uint32       dif;
    /*Normalization factor.*/
    ec_uint32       nrm;
-   /*Byte that will be written at the end*/
-   unsigned char   end_byte;
-   /*Number of valid bits in end_byte*/
-   int             end_bits_left;
-   int             nb_end_bits;
-   /*Nonzero if an error occurred*/
+   /*Bits that were written at the end.*/
+   ec_window       end_window;
+   /*Number of valid bits in end_window.*/
+   int             nend_bits;
+   /*The total number of whole bits read.*/
+   int             nbits_total;
+   /*Nonzero if an error occurred.*/
    int             error;
 };
 
@@ -101,7 +103,7 @@ void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,
   The bits must have been encoded with ec_enc_bits().
   No call to ec_dec_update() is necessary after this call.
   _ftb: The number of bits to extract.
-        This must be at least one, and no more than 32.
+        This must be between 0 and 25, inclusive.
   Return: The decoded bits.*/
 ec_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb);
 /*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
@@ -137,7 +139,7 @@ int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
            rounding error is in the positive direction).*/
 ec_uint32 ec_dec_tell(ec_dec *_this,int _b);
 
-/*Returns a nonzero value if any error has been detected during decoding*/
+/*Return: A nonzero value if any error has been detected during decoding.*/
 int ec_dec_get_error(ec_dec *_this);
 
 #endif
diff --git a/libcelt/entenc.c b/libcelt/entenc.c
index 8b1e1e3a..d4dcda74 100644
--- a/libcelt/entenc.c
+++ b/libcelt/entenc.c
@@ -29,53 +29,60 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef HAVE_CONFIG_H
-#include "config.h"
+#if defined(HAVE_CONFIG_H)
+# include "config.h"
 #endif
-
 #include "os_support.h"
 #include "entenc.h"
 #include "arch.h"
 
 
-void ec_byte_writeinit_buffer(ec_byte_buffer *_b, unsigned char *_buf, ec_uint32 _size){
-  _b->ptr=_b->buf=_buf;
-  _b->end_ptr=_b->buf+_size-1;
+void ec_byte_writeinit_buffer(ec_byte_buffer *_b,
+ unsigned char *_buf,ec_uint32 _size){
+  _b->buf=_buf;
+  _b->end_offs=_b->offs=0;
   _b->storage=_size;
 }
 
-void ec_byte_shrink(ec_byte_buffer *_b, ec_uint32 _size){
-   int i;
-   int d;
-   int N;
-   d = _b->storage-_size;
-   N = _b->storage-(_b->end_ptr-_b->buf)-1;
-   /* Copy "raw bytes" */
-   _b->end_ptr=_b->buf+_size-1-N;
-   for (i=0;i<N;i++)
-      _b->end_ptr[i+1] = _b->end_ptr[i+1+d];
-   _b->storage=_size;
+void ec_byte_shrink(ec_byte_buffer *_b,ec_uint32 _size){
+  celt_assert(_b->offs+_b->end_offs<=_size);
+  CELT_MOVE(_b->buf+_size-_b->end_offs,
+   _b->buf+_b->storage-_b->end_offs,_b->end_offs);
+  _b->storage=_size;
 }
 
-int ec_byte_write1(ec_byte_buffer *_b,unsigned _value){
-  ptrdiff_t endbyte;
-  endbyte=_b->ptr-_b->buf;
-  if(endbyte>=_b->storage){
-    return 1;
-  } else {
-    *(_b->ptr++)=(unsigned char)_value;
-    return 0;
-  }
+int ec_byte_write(ec_byte_buffer *_b,unsigned _value){
+  if(_b->offs+_b->end_offs>=_b->storage)return -1;
+  _b->buf[_b->offs++]=(unsigned char)_value;
+  return 0;
 }
 
 int ec_byte_write_at_end(ec_byte_buffer *_b,unsigned _value){
-  if (_b->end_ptr < _b->ptr)
-  {
-    return 1;
-  } else {
-    *(_b->end_ptr--)=(unsigned char)_value;
-    return 0;
+  if(_b->offs+_b->end_offs>=_b->storage)return -1;
+  _b->buf[_b->storage-++(_b->end_offs)]=(unsigned char)_value;
+  return 0;
+}
+
+int ec_byte_write_done(ec_byte_buffer *_b,int _start_bits_available,
+ unsigned _end_byte,int _end_bits_used){
+  int ret;
+  CELT_MEMSET(_b->buf+_b->offs,0,_b->storage-_b->offs-_b->end_offs);
+  ret=0;
+  if(_end_bits_used>0){
+    if(_b->offs+_b->end_offs>=_b->storage){
+      /*If there's no range coder data at all, give up.*/
+      if(_b->end_offs>=_b->storage)return -1;
+      /*If we've busted, don't add too many extra bits to the last byte; it
+         would corrupt the range coder data, and that's more important.*/
+      if(_start_bits_available<_end_bits_used){
+        _end_bits_used=_start_bits_available;
+        _end_byte&=(1<_start_bits_available)-1;
+        ret=-1;
+      }
+    }
+    _b->buf[_b->storage-_b->end_offs-1]|=_end_byte;
   }
+  return ret;
 }
 
 void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
@@ -86,18 +93,16 @@ void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
   celt_assert(_ft>1);
   _ft--;
   ftb=EC_ILOG(_ft);
-  if(ftb>EC_UNIT_BITS){
-    ftb-=EC_UNIT_BITS;
+  if(ftb>EC_UINT_BITS){
+    ftb-=EC_UINT_BITS;
     ft=(_ft>>ftb)+1;
     fl=(unsigned)(_fl>>ftb);
     ec_encode(_this,fl,fl+1,ft);
-    ec_enc_bits(_this,_fl&(1<<ftb)-1,ftb);
-  } else {
-    ec_encode(_this,_fl,_fl+1,_ft+1);
+    ec_enc_bits(_this,_fl&((ec_uint32)1<<ftb)-1,ftb);
   }
+  else ec_encode(_this,_fl,_fl+1,_ft+1);
 }
 
-int ec_enc_get_error(ec_enc *_this)
-{
+int ec_enc_get_error(ec_enc *_this){
   return _this->error;
 }
diff --git a/libcelt/entenc.h b/libcelt/entenc.h
index 2b31f96a..b64ed819 100644
--- a/libcelt/entenc.h
+++ b/libcelt/entenc.h
@@ -52,12 +52,13 @@ struct ec_enc{
    ec_uint32       rng;
    /*The low end of the current range (inclusive).*/
    ec_uint32       low;
-   /*Byte that will be written at the end*/
-   unsigned char   end_byte;
-   /*Number of valid bits in end_byte*/
-   int             end_bits_left;
-   int             nb_end_bits;
-   /*Nonzero if an error occurred*/
+   /*Bits that will be written at the end.*/
+   ec_window       end_window;
+   /*Number of valid bits in end_window.*/
+   int             nend_bits;
+   /*The total number of whole bits written.*/
+   int             nbits_total;
+   /*Nonzero if an error occurred.*/
    int             error;
 };
 
@@ -84,7 +85,7 @@ void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits);
 /*Encodes a sequence of raw bits in the stream.
   _fl:  The bits to encode.
   _ftb: The number of bits to encode.
-        This must be at least one, and no more than 32.*/
+        This must be between 0 and 25, inclusive.*/
 void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,unsigned _ftb);
 
 /*Encodes a raw unsigned integer in the stream.
diff --git a/libcelt/quant_bands.c b/libcelt/quant_bands.c
index e894aff1..091d6f12 100644
--- a/libcelt/quant_bands.c
+++ b/libcelt/quant_bands.c
@@ -207,7 +207,6 @@ static void quant_coarse_energy_impl(const CELTMode *m, int start, int end,
          bits_left = budget-(int)ec_enc_tell(enc, 0)-3*C*(end-i);
          if (i!=start && bits_left < 30)
          {
-            qi = IMAX(-1,qi);
             if (bits_left < 24)
                qi = IMIN(1, qi);
             if (bits_left < 16)
@@ -274,6 +273,8 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
       ec_enc enc_intra_state;
       ec_byte_buffer buf_intra_state;
       int tell_intra;
+      ec_uint32 nstart_bytes;
+      ec_uint32 nintra_bytes;
       VARDECL(unsigned char, intra_bits);
 
       tell_intra = ec_enc_tell(enc, 3);
@@ -281,9 +282,13 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
       enc_intra_state = *enc;
       buf_intra_state = *(enc->buf);
 
-      ALLOC(intra_bits, buf_intra_state.ptr-buf_start_state.ptr, unsigned char);
+      nstart_bytes = ec_byte_bytes(&buf_start_state);
+      nintra_bytes = ec_byte_bytes(&buf_intra_state);
+      ALLOC(intra_bits, nintra_bytes-nstart_bytes, unsigned char);
       /* Copy bits from intra bit-stream */
-      CELT_COPY(intra_bits, buf_start_state.ptr, buf_intra_state.ptr-buf_start_state.ptr);
+      CELT_COPY(intra_bits,
+            ec_byte_get_buffer(&buf_intra_state) + nstart_bytes,
+            nintra_bytes - nstart_bytes);
 
       *enc = enc_start_state;
       *(enc->buf) = buf_start_state;
@@ -295,8 +300,9 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
       {
          *enc = enc_intra_state;
          *(enc->buf) = buf_intra_state;
-         /* Copy bits from to bit-stream */
-         CELT_COPY(buf_start_state.ptr, intra_bits, buf_intra_state.ptr-buf_start_state.ptr);
+         /* Copy intra bits to bit-stream */
+         CELT_COPY(ec_byte_get_buffer(&buf_intra_state) + nstart_bytes,
+               intra_bits, nintra_bytes - nstart_bytes);
          CELT_COPY(oldEBands, oldEBands_intra, C*end);
          CELT_COPY(error, error_intra, C*end);
       }
diff --git a/libcelt/rangedec.c b/libcelt/rangedec.c
index a1952b5f..9e3f41ed 100644
--- a/libcelt/rangedec.c
+++ b/libcelt/rangedec.c
@@ -97,35 +97,20 @@
   }*/
 
 
-/*Gets the next byte of input.
-  After all the bytes in the current packet have been consumed, and the extra
-   end code returned if needed, this function will continue to return zero each
-   time it is called.
-  Return: The next byte of input.*/
-static int ec_dec_in(ec_dec *_this){
-  int ret;
-  ret=ec_byte_read1(_this->buf);
-  if(ret<0){
-    ret=0;
-    /*Needed to keep oc_dec_tell() operating correctly.*/
-    ec_byte_adv1(_this->buf);
-  }
-  return ret;
-}
-
 /*Normalizes the contents of dif and rng so that rng lies entirely in the
    high-order symbol.*/
 static inline void ec_dec_normalize(ec_dec *_this){
   /*If the range is too small, rescale it and input some bits.*/
   while(_this->rng<=EC_CODE_BOT){
     int sym;
+    _this->nbits_total+=EC_SYM_BITS;
     _this->rng<<=EC_SYM_BITS;
     /*Use up the remaining bits from our last symbol.*/
-    sym=_this->rem<<EC_CODE_EXTRA;
+    sym=_this->rem;
     /*Read the next value from the input.*/
-    _this->rem=ec_dec_in(_this);
+    _this->rem=ec_byte_read(_this->buf);
     /*Take the rest of the bits we need from this new symbol.*/
-    sym|=_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA;
+    sym=(sym<<EC_SYM_BITS|_this->rem)>>EC_SYM_BITS-EC_CODE_EXTRA;
     /*And subtract them from dif, capped to be less than EC_CODE_TOP.*/
     _this->dif=(_this->dif<<EC_SYM_BITS)+(EC_SYM_MAX&~sym)&EC_CODE_TOP-1;
   }
@@ -133,14 +118,17 @@ static inline void ec_dec_normalize(ec_dec *_this){
 
 void ec_dec_init(ec_dec *_this,ec_byte_buffer *_buf){
   _this->buf=_buf;
-  _this->rem=ec_dec_in(_this);
+  _this->rem=ec_byte_read(_buf);
   _this->rng=1U<<EC_CODE_EXTRA;
   _this->dif=_this->rng-1-(_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA);
   /*Normalize the interval.*/
   ec_dec_normalize(_this);
-  _this->end_byte=0; /* Required for platforms that have chars > 8 bits */
-  _this->end_bits_left=0;
-  _this->nb_end_bits=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_enc_tell() will subtract partial bits.
+    This must be after the initial ec_dec_normalize(), or you will have to
+     compensate for the bits that are read there.*/
+  _this->nbits_total=EC_CODE_BITS+1;
   _this->error=0;
 }
 
@@ -159,28 +147,11 @@ unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){
    return (1<<_bits)-EC_MINI(s+1,1<<_bits);
 }
 
-unsigned ec_dec_bits(ec_dec *_this,unsigned bits){
-  unsigned value=0;
-  int count=0;
-  _this->nb_end_bits += bits;
-  while (bits>=_this->end_bits_left)
-  {
-    value |= _this->end_byte>>(8-_this->end_bits_left)<<count;
-    count += _this->end_bits_left;
-    bits -= _this->end_bits_left;
-    _this->end_byte=ec_byte_look_at_end(_this->buf);
-    _this->end_bits_left = 8;
-  }
-  value |= ((_this->end_byte>>(8-_this->end_bits_left))&((1<<bits)-1))<<count;
-  _this->end_bits_left -= bits;
-  return value;
-}
-
 void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
   ec_uint32 s;
-  s=IMUL32(_this->nrm,(_ft-_fh));
+  s=IMUL32(_this->nrm,_ft-_fh);
   _this->dif-=s;
-  _this->rng=_fl>0?IMUL32(_this->nrm,(_fh-_fl)):_this->rng-s;
+  _this->rng=_fl>0?IMUL32(_this->nrm,_fh-_fl):_this->rng-s;
   ec_dec_normalize(_this);
 }
 
@@ -237,17 +208,45 @@ int ec_dec_cdf(ec_dec *_this,const int *_cdf,unsigned _ftb){
   return val-1;
 }
 
+ec_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){
+  ec_window window;
+  int       available;
+  int       ret;
+  window=_this->end_window;
+  available=_this->nend_bits;
+  if(available<_bits){
+    do{
+      window|=(ec_window)ec_byte_read_from_end(_this->buf)<<available;
+      available+=EC_SYM_BITS;
+    }
+    while(available<=EC_WINDOW_SIZE-EC_SYM_BITS);
+  }
+  ret=(ec_uint32)window&((ec_uint32)1<<_bits)-1;
+  window>>=_bits;
+  available-=_bits;
+  _this->end_window=window;
+  _this->nend_bits=available;
+  _this->nbits_total+=_bits;
+  return ret;
+}
+
 ec_uint32 ec_dec_tell(ec_dec *_this,int _b){
+  ec_uint32 nbits;
   ec_uint32 r;
   int       l;
-  ec_uint32      nbits;
-  nbits=(ec_byte_bytes(_this->buf)-(EC_CODE_BITS+EC_SYM_BITS-1)/EC_SYM_BITS)*
-   EC_SYM_BITS;
   /*To handle the non-integral number of bits still left in the decoder state,
-     we compute the number of bits of low that must be encoded to ensure that
-     the value is inside the range for any possible subsequent bits.*/
-  nbits+=EC_CODE_BITS+1+_this->nb_end_bits;
-  nbits<<=_b;
+     we compute the worst-case number of bits of low that must be encoded to
+     ensure that the value is inside the range for any possible subsequent
+     bits.
+    The computation here is independent of low itself (the decoder does not
+     even track that value), even though the real number of bits used after
+     ec_enc_done() may be 1 smaller if rng is a power of two and the
+     corresponding trailing bits of low are all zeros.
+    If we did try to track that special case, then coding a value with a
+     probability of 1/(1<<n) might sometimes appear to use more than n bits.
+    This may help explain the surprising result that a newly initialized
+     decoder claims to have used 1 bit.*/
+  nbits=_this->nbits_total<<_b;
   l=EC_ILOG(_this->rng);
   r=_this->rng>>l-16;
   while(_b-->0){
diff --git a/libcelt/rangeenc.c b/libcelt/rangeenc.c
index e5040e70..5f276440 100644
--- a/libcelt/rangeenc.c
+++ b/libcelt/rangeenc.c
@@ -83,11 +83,11 @@ static void ec_enc_carry_out(ec_enc *_this,int _c){
     carry=_c>>EC_SYM_BITS;
     /*Don't output a byte on the first write.
       This compare should be taken care of by branch-prediction thereafter.*/
-    if(_this->rem>=0)_this->error|=ec_byte_write1(_this->buf,_this->rem+carry);
+    if(_this->rem>=0)_this->error|=ec_byte_write(_this->buf,_this->rem+carry);
     if(_this->ext>0){
       unsigned sym;
       sym=EC_SYM_MAX+carry&EC_SYM_MAX;
-      do _this->error|=ec_byte_write1(_this->buf,sym);
+      do _this->error|=ec_byte_write(_this->buf,sym);
       while(--(_this->ext)>0);
     }
     _this->rem=_c&EC_SYM_MAX;
@@ -102,6 +102,7 @@ static inline void ec_enc_normalize(ec_enc *_this){
     /*Move the next-to-high-order symbol into the high-order position.*/
     _this->low=_this->low<<EC_SYM_BITS&EC_CODE_TOP-1;
     _this->rng<<=EC_SYM_BITS;
+    _this->nbits_total+=EC_SYM_BITS;
   }
 }
 
@@ -111,9 +112,10 @@ void ec_enc_init(ec_enc *_this,ec_byte_buffer *_buf){
   _this->ext=0;
   _this->low=0;
   _this->rng=EC_CODE_TOP;
-  _this->end_byte=0;
-  _this->end_bits_left=8;
-  _this->nb_end_bits=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_enc_tell() will subtract partial bits.*/
+  _this->nbits_total=EC_CODE_BITS+1;
   _this->error=0;
 }
 
@@ -129,69 +131,82 @@ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
 }
 
 void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){
-   ec_uint32 r;
-   r=_this->rng>>_bits;
-   if(_fl>0){
-      _this->low+=_this->rng-IMUL32(r,((1<<_bits)-_fl));
-      _this->rng=IMUL32(r,(_fh-_fl));
-   }
-   else _this->rng-=IMUL32(r,((1<<_bits)-_fh));
-   ec_enc_normalize(_this);
+  ec_uint32 r;
+  r=_this->rng>>_bits;
+  if(_fl>0){
+    _this->low+=_this->rng-IMUL32(r,((1<<_bits)-_fl));
+    _this->rng=IMUL32(r,(_fh-_fl));
+  }
+  else _this->rng-=IMUL32(r,((1<<_bits)-_fh));
+  ec_enc_normalize(_this);
 }
 
 /*The probability of having a "one" is given in 1/65536.*/
 void ec_enc_bit_prob(ec_enc *_this,int _val,unsigned _prob){
-   ec_uint32 r;
-   ec_uint32 s;
-   ec_uint32 l;
-   r=_this->rng;
-   l=_this->low;
-   s=(r>>16)*_prob;
-   r-=s;
-   if(_val)_this->low=l+r;
-   _this->rng=_val?s:r;
-   ec_enc_normalize(_this);
+  ec_uint32 r;
+  ec_uint32 s;
+  ec_uint32 l;
+  r=_this->rng;
+  l=_this->low;
+  s=(r>>16)*_prob;
+  r-=s;
+  if(_val)_this->low=l+r;
+  _this->rng=_val?s:r;
+  ec_enc_normalize(_this);
 }
 
 /*The probability of having a "one" is 1/(1<<_logp).*/
 void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){
-   ec_uint32 r;
-   ec_uint32 s;
-   ec_uint32 l;
-   r=_this->rng;
-   l=_this->low;
-   s=r>>_logp;
-   r-=s;
-   if(_val)_this->low=l+r;
-   _this->rng=_val?s:r;
-   ec_enc_normalize(_this);
+  ec_uint32 r;
+  ec_uint32 s;
+  ec_uint32 l;
+  r=_this->rng;
+  l=_this->low;
+  s=r>>_logp;
+  r-=s;
+  if(_val)_this->low=l+r;
+  _this->rng=_val?s:r;
+  ec_enc_normalize(_this);
 }
 
-void ec_enc_bits(ec_enc *_this,unsigned _fl,unsigned bits){
-  _this->nb_end_bits += bits;
-  while (bits >= _this->end_bits_left)
-  {
-    _this->end_byte |= (_fl<<(8-_this->end_bits_left)) & 0xff;
-    _fl >>= _this->end_bits_left;
-    _this->error|=ec_byte_write_at_end(_this->buf, _this->end_byte);
-    _this->end_byte = 0;
-    bits -= _this->end_bits_left;
-    _this->end_bits_left = 8;
+void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,unsigned _bits){
+  ec_window window;
+  int       used;
+  window=_this->end_window;
+  used=_this->nend_bits;
+  if(used+_bits>EC_WINDOW_SIZE){
+    do{
+      _this->error|=
+       ec_byte_write_at_end(_this->buf,(unsigned)window&EC_SYM_MAX);
+      window>>=EC_SYM_BITS;
+      used-=EC_SYM_BITS;
+    }
+    while(used>=EC_SYM_BITS);
   }
-  _this->end_byte |= (_fl<<(8-_this->end_bits_left)) & 0xff;
-  _this->end_bits_left -= bits;
+  window|=(ec_window)_fl<<used;
+  used+=_bits;
+  _this->end_window=window;
+  _this->nend_bits=used;
+  _this->nbits_total+=_bits;
 }
 
 ec_uint32 ec_enc_tell(ec_enc *_this,int _b){
+  ec_uint32 nbits;
   ec_uint32 r;
   int       l;
-  ec_uint32      nbits;
-  nbits=(ec_byte_bytes(_this->buf)+(_this->rem>=0)+_this->ext)*EC_SYM_BITS;
-  /*To handle the non-integral number of bits still left in the encoder state,
-     we compute the number of bits of low that must be encoded to ensure that
-     the value is inside the range for any possible subsequent bits.*/
-  nbits+=EC_CODE_BITS+1+_this->nb_end_bits;
-  nbits<<=_b;
+  /*To handle the non-integral number of bits still left in the decoder state,
+     we compute the worst-case number of bits of low that must be encoded to
+     ensure that the value is inside the range for any possible subsequent
+     bits.
+    The computation here is independent of low itself (the decoder does not
+     even track that value), even though the real number of bits used after
+     ec_enc_done() may be 1 smaller if rng is a power of two and the
+     corresponding trailing bits of low are all zeros.
+    If we did try to track that special case, then coding a value with a
+     probability of 1/(1<<n) might sometimes appear to use more than n bits.
+    This may help explain the surprising result that a newly initialized
+     encoder claims to have used 1 bit.*/
+  nbits=_this->nbits_total<<_b;
   l=EC_ILOG(_this->rng);
   r=_this->rng>>l-16;
   while(_b-->0){
@@ -205,6 +220,8 @@ ec_uint32 ec_enc_tell(ec_enc *_this,int _b){
 }
 
 void ec_enc_done(ec_enc *_this){
+  ec_window window;
+  int       used;
   ec_uint32 msk;
   ec_uint32 end;
   int       l;
@@ -224,15 +241,16 @@ void ec_enc_done(ec_enc *_this){
     l-=EC_SYM_BITS;
   }
   /*If we have a buffered byte flush it into the output buffer.*/
-  if(_this->rem>=0||_this->ext>0){
-    ec_enc_carry_out(_this,0);
-    _this->rem=-1;
-  }
-  {
-    unsigned char *ptr = _this->buf->ptr;
-    while (ptr<= _this->buf->end_ptr)
-      *ptr++ = 0;
-    if (_this->end_bits_left != 8)
-      *_this->buf->end_ptr |= _this->end_byte;
+  if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0);
+  /*If we have buffered extra bits, flush them as well.*/
+  window=_this->end_window;
+  used=_this->nend_bits;
+  while(used>=EC_SYM_BITS){
+    _this->error|=
+     ec_byte_write_at_end(_this->buf,(unsigned)window&EC_SYM_MAX);
+    window>>=EC_SYM_BITS;
+    used-=EC_SYM_BITS;
   }
+  /*Clear any excess space and add any remaining extra bits to the last byte.*/
+  if(!_this->error)_this->error=ec_byte_write_done(_this->buf,-l,window,used);
 }
diff --git a/tests/ectest.c b/tests/ectest.c
index 76c3f4e3..eebab329 100644
--- a/tests/ectest.c
+++ b/tests/ectest.c
@@ -77,7 +77,7 @@ int main(int _argc,char **_argv){
   fprintf(stderr,
    "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n",
    entropy,ldexp(nbits,-4),100*(nbits-ldexp(entropy,4))/nbits);
-  fprintf(stderr,"Packed to %li bytes.\n",(long)(buf.ptr-buf.buf));
+  fprintf(stderr,"Packed to %li bytes.\n",(long)ec_byte_bytes(&buf));
   ec_byte_readinit(&buf,ptr,DATA_SIZE);
   ec_dec_init(&dec,&buf);
   for(ft=2;ft<1024;ft++){
@@ -109,27 +109,36 @@ int main(int _argc,char **_argv){
   fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536);
   for(i=0;i<409600;i++){
     unsigned *data;
+    unsigned *tell;
     int       j;
     int tell_bits;
     int zeros;
     ft=rand()/((RAND_MAX>>(rand()%11))+1)+10;
     sz=rand()/((RAND_MAX>>(rand()%9))+1);
     data=(unsigned *)malloc(sz*sizeof(*data));
+    tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
     ec_byte_writeinit_buffer(&buf, ptr, DATA_SIZE2);
     ec_enc_init(&enc,&buf);
     zeros = rand()%13==0;
+    tell[0]=ec_enc_tell(&enc, 3);
     for(j=0;j<sz;j++){
       if (zeros)
         data[j]=0;
       else
         data[j]=rand()%ft;
       ec_enc_uint(&enc,data[j],ft);
+      tell[j+1]=ec_enc_tell(&enc, 3);
     }
     if (rand()%2==0)
       while(ec_enc_tell(&enc, 0)%8 != 0)
         ec_enc_uint(&enc, rand()%2, 2);
     tell_bits = ec_enc_tell(&enc, 0);
     ec_enc_done(&enc);
+    if(tell_bits!=ec_enc_tell(&enc,0)){
+      fprintf(stderr,"tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n",
+       ec_enc_tell(&enc,0),tell_bits,seed);
+      ret=-1;
+    }
     if ((tell_bits+7)/8 < ec_byte_bytes(&buf))
     {
       fprintf (stderr, "tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
@@ -139,6 +148,11 @@ int main(int _argc,char **_argv){
     tell_bits -= 8*ec_byte_bytes(&buf);
     ec_byte_readinit(&buf,ptr,DATA_SIZE2);
     ec_dec_init(&dec,&buf);
+    if(ec_dec_tell(&dec,3)!=tell[0]){
+      fprintf(stderr,
+       "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+       0,ec_dec_tell(&dec,3),tell[0],seed);
+    }
     for(j=0;j<sz;j++){
       sym=ec_dec_uint(&dec,ft);
       if(sym!=data[j]){
@@ -147,6 +161,11 @@ int main(int _argc,char **_argv){
          sym,data[j],ft,j,sz,seed);
         ret=-1;
       }
+      if(ec_dec_tell(&dec,3)!=tell[j+1]){
+        fprintf(stderr,
+         "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+         j+1,ec_dec_tell(&dec,3),tell[j+1],seed);
+      }
     }
     free(data);
   }
-- 
GitLab