diff --git a/Makefile.am b/Makefile.am
index b090ca09ff255443f71cf00c2f6366a533f9d3c6..edbcc42eb2e35fe7aa3e29ddc848f1b764d90b32 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,9 +20,10 @@ if FIXED_POINT
 SILK_SOURCES += $(SILK_SOURCES_FIXED)
 else
 SILK_SOURCES += $(SILK_SOURCES_FLOAT)
-OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
 endif
 
+OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
+
 if CPU_ARM
 CELT_SOURCES += $(CELT_SOURCES_ARM)
 endif
diff --git a/celt/arch.h b/celt/arch.h
index 78e2635f006f39bddf94631a71e5d5cc72970e4f..f9c98567aa848944eae08fc25423c9d65db58c23 100644
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -185,6 +185,7 @@ typedef float celt_ener;
 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
 
 #define MULT16_16_Q11_32(a,b)     ((a)*(b))
+#define MULT16_16_Q11(a,b)     ((a)*(b))
 #define MULT16_16_Q13(a,b)     ((a)*(b))
 #define MULT16_16_Q14(a,b)     ((a)*(b))
 #define MULT16_16_Q15(a,b)     ((a)*(b))
diff --git a/celt/celt.h b/celt/celt.h
index 0911c72f721066a71d48f35a06d7b5bd9fb42fa0..cdb76c8b920c7477aed61e0f1b5068b7040b350e 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -52,11 +52,11 @@ extern "C" {
 
 typedef struct {
    int valid;
-   opus_val16 tonality;
-   opus_val16 tonality_slope;
-   opus_val16 noisiness;
-   opus_val16 activity;
-   opus_val16 music_prob;
+   float tonality;
+   float tonality_slope;
+   float noisiness;
+   float activity;
+   float music_prob;
    int        bandwidth;
 }AnalysisInfo;
 
@@ -109,10 +109,7 @@ typedef struct {
 #define OPUS_SET_LFE_REQUEST    10024
 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
 
-#define OPUS_SET_ENERGY_SAVE_REQUEST    10026
-#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)
-
-#define OPUS_SET_ENERGY_MASK_REQUEST    10028
+#define OPUS_SET_ENERGY_MASK_REQUEST    10026
 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
 
 /* Encoder stuff */
@@ -193,6 +190,9 @@ extern const signed char tf_select_table[4][8];
 
 int resampling_factor(opus_int32 rate);
 
+void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
       const opus_val16 *window, int overlap);
diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
index 31bea1bbaa8b9a9c172c7c12ba22bf824a0412fd..241fb3721e832b3d3c7c32bdbf3b109eca0f0011 100644
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -111,7 +111,6 @@ struct OpusCustomEncoder {
    opus_val32 overlap_max;
    opus_val16 stereo_saving;
    int intensity;
-   opus_val16 *energy_save;
    opus_val16 *energy_mask;
    opus_val16 spec_avg;
 
@@ -452,7 +451,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
 }
 
 
-static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
                         int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
 {
    int i;
@@ -744,7 +743,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       const opus_val16 *bandLogE, int end, int LM, int C, int N0,
       AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
-      int intensity)
+      int intensity, opus_val16 surround_trim)
 {
    int i;
    opus_val32 diff=0;
@@ -818,11 +817,12 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
    if (diff < -QCONST16(10.f, DB_SHIFT))
       trim_index++;
    trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+   trim -= SHR16(surround_trim, DB_SHIFT-8);
    trim -= 2*SHR16(tf_estimate, 14-8);
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    if (analysis->valid)
    {
-      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f)));
+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)));
    }
 #endif
 
@@ -877,7 +877,7 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X,
 static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
       int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
       int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
-      int effectiveBytes, opus_int32 *tot_boost_, int lfe)
+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
 {
    int i, c;
    opus_int32 tot_boost=0;
@@ -940,6 +940,8 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
             follower[i] = MAX16(0, bandLogE[i]-follower[i]);
          }
       }
+      for (i=start;i<end;i++)
+         follower[i] = MAX16(follower[i], surround_dynalloc[i]);
       /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
       if ((!vbr || constrained_vbr)&&!isTransient)
       {
@@ -1140,7 +1142,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
    target = base_target;
 
    /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    if (analysis->valid && analysis->activity<.4)
       target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
 #endif
@@ -1165,7 +1167,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
                     QCONST16(0.02f,14) : QCONST16(0.04f,14);
    target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
 
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    /* Apply tonality boost */
    if (analysis->valid && !lfe)
    {
@@ -1291,6 +1293,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    int transient_got_disabled=0;
    opus_val16 surround_masking=0;
    opus_val16 temporal_vbr=0;
+   opus_val16 surround_trim = 0;
+   VARDECL(opus_val16, surround_dynalloc);
    ALLOC_STACK;
 
    mode = st->mode;
@@ -1526,37 +1530,83 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
       }
    }
    amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
-   if (st->energy_save)
-   {
-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
-#ifdef FIXED_POINT
-      /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */
-      offset -= QCONST16(3.0f, DB_SHIFT);
-#endif
-      for(i=0;i<C*nbEBands;i++)
-         st->energy_save[i]=bandLogE[i]-offset;
-      st->energy_save=NULL;
-   }
+
+   ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+   for(i=0;i<st->end;i++)
+      surround_dynalloc[i] = 0;
    /* This computes how much masking takes place between surround channels */
-   if (st->energy_mask&&!st->lfe)
+   if (st->start==0&&st->energy_mask&&!st->lfe)
    {
+      int mask_end;
+      int midband;
+      int count_dynalloc;
       opus_val32 mask_avg=0;
-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+      opus_val32 diff=0;
+      int count=0;
+      mask_end = st->lastCodedBands;
       for (c=0;c<C;c++)
       {
-         opus_val16 followE, followMask;
-         followE = followMask = -QCONST16(14.f, DB_SHIFT);
-         for(i=0;i<st->end;i++)
+         for(i=0;i<mask_end;i++)
          {
-            /* We use a simple follower to approximate the masking spreading function. */
-            followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset);
-            followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]);
-            mask_avg += followE-followMask;
+            opus_val16 mask;
+            mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+                   QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+            if (mask > 0)
+               mask = HALF16(mask);
+            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+            count += eBands[i+1]-eBands[i];
+            diff += MULT16_16(mask, 1+2*i-mask_end);
          }
       }
-      surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT);
-      surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT));
-      surround_masking -= HALF16(HALF16(surround_masking));
+      mask_avg = DIV32_16(mask_avg,count);
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+      /* Again, being conservative */
+      diff = HALF32(diff);
+      diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+      /* Find the band that's in the middle of the coded spectrum */
+      for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+      count_dynalloc=0;
+      for(i=0;i<mask_end;i++)
+      {
+         opus_val32 lin;
+         opus_val16 unmask;
+         lin = mask_avg + diff*(i-midband);
+         if (C==2)
+            unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+         else
+            unmask = st->energy_mask[i];
+         unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+         unmask -= lin;
+         if (unmask > QCONST16(.25f, DB_SHIFT))
+         {
+            surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+            count_dynalloc++;
+         }
+      }
+      if (count_dynalloc>=3)
+      {
+         /* If we need dynalloc in many bands, it's probably because our
+            initial masking rate was too low. */
+         mask_avg += QCONST16(.25f, DB_SHIFT);
+         if (mask_avg>0)
+         {
+            /* Something went really wrong in the original calculations,
+               disabling masking. */
+            mask_avg = 0;
+            diff = 0;
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = 0;
+         } else {
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+         }
+      }
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      /* Convert to 1/64th units used for the trim */
+      surround_trim = 64*diff;
+      /*printf("%d %d ", mask_avg, surround_trim);*/
+      surround_masking = mask_avg;
    }
    /* Temporal VBR (but not for LFE) */
    if (!st->lfe)
@@ -1683,7 +1733,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 
    maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
          st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
-         eBands, LM, effectiveBytes, &tot_boost, st->lfe);
+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
    /* For LFE, everything interesting is in the first band */
    if (st->lfe)
       offsets[0] = IMIN(8, effectiveBytes/3);
@@ -1756,7 +1806,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          alloc_trim = 5;
       else
          alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
-            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);
+            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
       ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
       tell = ec_tell_frac(enc);
    }
@@ -1859,7 +1909,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
    bits -= anti_collapse_rsv;
    signalBandwidth = st->end-1;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    if (st->analysis.valid)
    {
       int min_bandwidth;
@@ -2261,12 +2311,6 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
           st->lfe = value;
       }
       break;
-      case OPUS_SET_ENERGY_SAVE_REQUEST:
-      {
-          opus_val16 *value = va_arg(ap, opus_val16*);
-          st->energy_save=value;
-      }
-      break;
       case OPUS_SET_ENERGY_MASK_REQUEST:
       {
           opus_val16 *value = va_arg(ap, opus_val16*);
diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h
index 0e77976e83bb4549090eadd1678e0b647df55237..657e67c8e7b1883fa36ba4faa20860f4d58af292 100644
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@@ -116,6 +116,7 @@
 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
 
 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
 #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
 #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
 #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
diff --git a/src/analysis.c b/src/analysis.c
index a9d2073fed6e88df2f667c47fa7c1cbb62c58fe8..53247df93241488a01fe83b9e6c8fddce99b9403 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -184,12 +184,12 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
    for (;i<DETECT_SIZE;i++)
       psum += tonal->pspeech[i];
    psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
-   /*printf("%f %f\n", psum, info_out->music_prob);*/
+   /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
 
    info_out->music_prob = psum;
 }
 
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
 {
     int i, b;
     const kiss_fft_state *kfft;
@@ -234,7 +234,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     kfft = celt_mode->mdct.kfft[0];
     if (tonal->count==0)
        tonal->mem_fill = 240;
-    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
     {
        tonal->mem_fill += len;
@@ -253,14 +253,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     for (i=0;i<N2;i++)
     {
        float w = analysis_window[i];
-       in[i].r = MULT16_16(w, tonal->inmem[i]);
-       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
-       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
-       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
+       in[i].r = w*tonal->inmem[i];
+       in[i].i = w*tonal->inmem[N2+i];
+       in[N-i-1].r = w*tonal->inmem[N-i-1];
+       in[N-i-1].i = w*tonal->inmem[N+N2-i-1];
     }
     OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
-    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
     tonal->mem_fill = 240 + remaining;
     opus_fft(kfft, in, out);
 
@@ -325,8 +325,12 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        float stationarity;
        for (i=tbands[b];i<tbands[b+1];i++)
        {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef FIXED_POINT
+          /* FIXME: It's probably best to change the BFCC filter initial state instead */
+          binE *= 5.55e-17f;
+#endif
           E += binE;
           tE += binE*tonality[i];
           nE += binE*2.f*(.5f-noisiness[i]);
@@ -334,7 +338,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        tonal->E[tonal->E_count][b] = E;
        frame_noisiness += nE/(1e-15f+E);
 
-       frame_loudness += celt_sqrt(E+1e-10f);
+       frame_loudness += sqrt(E+1e-10f);
        logE[b] = (float)log(E+1e-10f);
        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
@@ -343,21 +347,21 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
           tonal->highE[b]+=.5f;
           tonal->lowE[b]-=.5f;
        }
-       relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
+       relativeE += (logE[b]-tonal->lowE[b])/(1e-15+tonal->highE[b]-tonal->lowE[b]);
 
        L1=L2=0;
        for (i=0;i<NB_FRAMES;i++)
        {
-          L1 += celt_sqrt(tonal->E[i][b]);
+          L1 += sqrt(tonal->E[i][b]);
           L2 += tonal->E[i][b];
        }
 
-       stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
+       stationarity = MIN16(0.99f,L1/sqrt(1e-15+NB_FRAMES*L2));
        stationarity *= stationarity;
        stationarity *= stationarity;
        frame_stationarity += stationarity;
        /*band_tonality[b] = tE/(1e-15+E)*/;
-       band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
+       band_tonality[b] = MAX16(tE/(1e-15+E), stationarity*tonal->prev_band_tonality[b]);
 #if 0
        if (b>=NB_TONAL_SKIP_BANDS)
        {
@@ -379,6 +383,9 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     bandwidth = 0;
     maxE = 0;
     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef FIXED_POINT
+    noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
     noise_floor *= noise_floor;
     for (b=0;b<NB_TOT_BANDS;b++)
     {
@@ -389,8 +396,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        band_end = extra_bands[b+1];
        for (i=band_start;i<band_end;i++)
        {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
           E += binE;
        }
        maxE = MAX32(maxE, E);
@@ -469,14 +476,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        tonal->mem[i] = BFCC[i];
     }
     for (i=0;i<9;i++)
-       features[11+i] = celt_sqrt(tonal->std[i]);
+       features[11+i] = sqrt(tonal->std[i]);
     features[20] = info->tonality;
     features[21] = info->activity;
     features[22] = frame_stationarity;
     features[23] = info->tonality_slope;
     features[24] = tonal->lowECount;
 
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
     mlp_process(&net, features, frame_probs);
     frame_probs[0] = .5f*(frame_probs[0]+1);
     /* Curve fitting between the MLP probability and the actual probability */
@@ -611,44 +618,30 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     RESTORE_STACK;
 }
 
-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
 {
    int offset;
    int pcm_len;
 
-   /* Avoid overflow/wrap-around of the analysis buffer */
-   frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
-
-   pcm_len = frame_size - analysis->analysis_offset;
-   offset = 0;
-   do {
-      tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
-      offset += 480;
-      pcm_len -= 480;
-   } while (pcm_len>0);
-   analysis->analysis_offset = frame_size;
-
-   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   if (analysis_pcm != NULL)
    {
-      int LM = 3;
-      LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps,
-            analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
-      while ((Fs/400<<LM)>frame_size)
-         LM--;
-      frame_size = (Fs/400<<LM);
-   } else {
-      frame_size = frame_size_select(frame_size, variable_duration, Fs);
+      /* Avoid overflow/wrap-around of the analysis buffer */
+      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+      pcm_len = analysis_frame_size - analysis->analysis_offset;
+      offset = analysis->analysis_offset;
+      do {
+         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         offset += 480;
+         pcm_len -= 480;
+      } while (pcm_len>0);
+      analysis->analysis_offset = analysis_frame_size;
+
+      analysis->analysis_offset -= frame_size;
    }
-   if (frame_size<0)
-      return -1;
-   analysis->analysis_offset -= frame_size;
 
-   /* Only perform analysis up to 20-ms frames. Longer ones will be split if
-      they're in CELT-only mode. */
    analysis_info->valid = 0;
    tonality_get_info(analysis, analysis_info, frame_size);
-
-   return frame_size;
 }
diff --git a/src/analysis.h b/src/analysis.h
index 8cd788832ceea0dd1c0fd48a144281cd30ddb185..cf37792daa42cdfffd02178d73af7d659162f448 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -42,7 +42,7 @@ typedef struct {
    float angle[240];
    float d_angle[240];
    float d2_angle[240];
-   float inmem[ANALYSIS_BUF_SIZE];
+   opus_val32 inmem[ANALYSIS_BUF_SIZE];
    int   mem_fill;                      /* number of usable samples in the buffer */
    float prev_band_tonality[NB_TBANDS];
    float prev_tonality;
@@ -79,12 +79,12 @@ typedef struct {
 } TonalityAnalysisState;
 
 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
-     const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);
+     const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
 
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
 
-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
 
 #endif
diff --git a/src/mlp.c b/src/mlp.c
index 90e94a5fcb4cedd9f8560c11a5e231a6a89ceeeb..73b1d315b86b3a9372ec56b1def7a77e52e77e19 100644
--- a/src/mlp.c
+++ b/src/mlp.c
@@ -35,7 +35,7 @@
 #include "tansig_table.h"
 #define MAX_NEURONS 100
 
-#ifdef FIXED_POINT
+#if 0
 static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
 {
 	int i;
@@ -43,9 +43,9 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
 	/*double x, y;*/
 	opus_val16 dy, yy; /* Q14 */
 	/*x = 1.9073e-06*_x;*/
-	if (_x>=QCONST32(10,19))
+	if (_x>=QCONST32(8,19))
 		return QCONST32(1.,14);
-	if (_x<=-QCONST32(10,19))
+	if (_x<=-QCONST32(8,19))
 		return -QCONST32(1.,14);
 	xx = EXTRACT16(SHR32(_x, 8));
 	/*i = lrint(25*x);*/
@@ -62,11 +62,11 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
 }
 #else
 /*extern const float tansig_table[501];*/
-static inline opus_val16 tansig_approx(opus_val16 x)
+static inline float tansig_approx(float x)
 {
 	int i;
-	opus_val16 y, dy;
-	opus_val16 sign=1;
+	float y, dy;
+	float sign=1;
     if (x>=8)
         return 1;
     if (x<=-8)
@@ -85,6 +85,7 @@ static inline opus_val16 tansig_approx(opus_val16 x)
 }
 #endif
 
+#if 0
 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
 {
 	int j;
@@ -108,4 +109,28 @@ void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
 		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
 	}
 }
-
+#else
+void mlp_process(const MLP *m, const float *in, float *out)
+{
+    int j;
+    float hidden[MAX_NEURONS];
+    const float *W = m->weights;
+    /* Copy to tmp_in */
+    for (j=0;j<m->topo[1];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[0];k++)
+            sum = sum + in[k]**W++;
+        hidden[j] = tansig_approx(sum);
+    }
+    for (j=0;j<m->topo[2];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[1];k++)
+            sum = sum + hidden[k]**W++;
+        out[j] = tansig_approx(sum);
+    }
+}
+#endif
diff --git a/src/mlp.h b/src/mlp.h
index 68ff68d82ef2fe5907c1c4cb486fbc2dd2fb3595..86c8e0617d0461797e6e104c1b06c20fb62a4f64 100644
--- a/src/mlp.h
+++ b/src/mlp.h
@@ -33,9 +33,9 @@
 typedef struct {
 	int layers;
 	const int *topo;
-	const opus_val16 *weights;
+	const float *weights;
 } MLP;
 
-void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out);
+void mlp_process(const MLP *m, const float *in, float *out);
 
 #endif /* _MLP_H_ */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index ac40edac7c742f1bf5528b0c30d398f8002421e5..c151fd078b08e862a93a1ecf688d193e97d22806 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -95,10 +95,10 @@ struct OpusEncoder {
     int          silk_bw_switch;
     /* Sampling rate (at the API level) */
     int          first;
-    int          energy_masking;
+    opus_val16 * energy_masking;
     StereoWidthState width_mem;
     opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
     TonalityAnalysisState analysis;
     int          detected_bandwidth;
     int          analysis_offset;
@@ -201,7 +201,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
     st->silk_mode.payloadSize_ms            = 20;
     st->silk_mode.bitRate                   = 25000;
     st->silk_mode.packetLossPercentage      = 0;
-    st->silk_mode.complexity                = 10;
+    st->silk_mode.complexity                = 9;
     st->silk_mode.useInBandFEC              = 0;
     st->silk_mode.useDTX                    = 0;
     st->silk_mode.useCBR                    = 0;
@@ -212,7 +212,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
     if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;
 
     celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
-    celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(10));
+    celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
 
     st->use_vbr = 1;
     /* Makes constrained VBR the default (safer for real-time use) */
@@ -551,7 +551,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
     return st->user_bitrate_bps;
 }
 
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
 /* Don't use more than 60 ms for the frame size analysis */
 #define MAX_DYNAMIC_FRAMESIZE 24
 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */
@@ -685,32 +685,6 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_
    return best_state;
 }
 
-void downmix_float(const void *_x, float *sub, int subframe, int offset, int C)
-{
-   const float *x;
-   int c, j;
-   x = (const float *)_x;
-   for (j=0;j<subframe;j++)
-      sub[j] = x[(j+offset)*C];
-   for (c=1;c<C;c++)
-      for (j=0;j<subframe;j++)
-         sub[j] += x[(j+offset)*C+c];
-}
-
-void downmix_int(const void *_x, float *sub, int subframe, int offset, int C)
-{
-   const opus_int16 *x;
-   int c, j;
-   x = (const opus_int16 *)_x;
-   for (j=0;j<subframe;j++)
-      sub[j] = x[(j+offset)*C];
-   for (c=1;c<C;c++)
-      for (j=0;j<subframe;j++)
-         sub[j] += x[(j+offset)*C+c];
-   for (j=0;j<subframe;j++)
-      sub[j] *= (1.f/32768);
-}
-
 int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
                 int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
                 downmix_func downmix)
@@ -723,10 +697,10 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
    int bestLM=0;
    int subframe;
    int pos;
-   VARDECL(opus_val16, sub);
+   VARDECL(opus_val32, sub);
 
    subframe = Fs/400;
-   ALLOC(sub, subframe, opus_val16);
+   ALLOC(sub, subframe, opus_val32);
    e[0]=mem[0];
    e_1[0]=1.f/(EPSILON+mem[0]);
    if (buffering)
@@ -754,7 +728,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
       int j;
       tmp=EPSILON;
 
-      downmix(x, sub, subframe, i*subframe, C);
+      downmix(x, sub, subframe, i*subframe, 0, -2, C);
       if (i==0)
          memx = sub[0];
       for (j=0;j<subframe;j++)
@@ -784,6 +758,76 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
 
 #endif
 
+#ifndef DISABLE_FLOAT_API
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+   const float *x;
+   opus_val32 scale;
+   int j;
+   x = (const float *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = SCALEIN(x[(j+offset)*C+c1]);
+   if (c2>-1)
+   {
+      for (j=0;j<subframe;j++)
+         sub[j] += SCALEIN(x[(j+offset)*C+c2]);
+   } else if (c2==-2)
+   {
+      int c;
+      for (c=1;c<C;c++)
+      {
+         for (j=0;j<subframe;j++)
+            sub[j] += SCALEIN(x[(j+offset)*C+c]);
+      }
+   }
+#ifdef FIXED_POINT
+   scale = (1<<SIG_SHIFT);
+#else
+   scale = 1.f;
+#endif
+   if (C==-2)
+      scale /= C;
+   else
+      scale /= 2;
+   for (j=0;j<subframe;j++)
+      sub[j] *= scale;
+}
+#endif
+
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+   const opus_int16 *x;
+   opus_val32 scale;
+   int j;
+   x = (const opus_int16 *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = x[(j+offset)*C+c1];
+   if (c2>-1)
+   {
+      for (j=0;j<subframe;j++)
+         sub[j] += x[(j+offset)*C+c2];
+   } else if (c2==-2)
+   {
+      int c;
+      for (c=1;c<C;c++)
+      {
+         for (j=0;j<subframe;j++)
+            sub[j] += x[(j+offset)*C+c];
+      }
+   }
+#ifdef FIXED_POINT
+   scale = (1<<SIG_SHIFT);
+#else
+   scale = 1.f/32768;
+#endif
+   if (C==-2)
+      scale /= C;
+   else
+      scale /= 2;
+   for (j=0;j<subframe;j++)
+      sub[j] *= scale;
+}
+
 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
 {
    int new_size;
@@ -805,6 +849,29 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_
    return new_size;
 }
 
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+      int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem)
+{
+#ifndef DISABLE_FLOAT_API
+   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   {
+      int LM = 3;
+      LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps,
+            0, subframe_mem, delay_compensation, downmix);
+      while ((Fs/400<<LM)>frame_size)
+         LM--;
+      frame_size = (Fs/400<<LM);
+   } else
+#endif
+   {
+      frame_size = frame_size_select(frame_size, variable_duration, Fs);
+   }
+   if (frame_size<0)
+      return -1;
+   return frame_size;
+}
+
 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
 {
    opus_val16 corr;
@@ -883,11 +950,8 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3
 }
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
-                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
-#ifndef FIXED_POINT
-                , AnalysisInfo *analysis_info
-#endif
-                )
+                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)
 {
     void *silk_enc;
     CELTEncoder *celt_enc;
@@ -916,6 +980,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
     int total_buffer;
     opus_val16 stereo_width;
+    const CELTMode *celt_mode;
+    AnalysisInfo analysis_info;
+    int analysis_read_pos_bak=-1;
+    int analysis_read_subframe_bak=-1;
     VARDECL(opus_val16, tmp_prefill);
 
     ALLOC_STACK;
@@ -941,17 +1009,34 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
 
     lsb_depth = IMIN(lsb_depth, st->lsb_depth);
 
+    analysis_info.valid = 0;
+    celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+#ifndef DISABLE_FLOAT_API
+#ifdef FIXED_POINT
+    if (st->silk_mode.complexity >= 10 && st->Fs==48000)
+#else
+    if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+#endif
+    {
+       analysis_read_pos_bak = st->analysis.read_pos;
+       analysis_read_subframe_bak = st->analysis.read_subframe;
+       run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
+             c1, c2, analysis_channels, st->Fs,
+             lsb_depth, downmix, &analysis_info);
+    }
+#endif
+
     st->voice_ratio = -1;
 
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
     st->detected_bandwidth = 0;
-    if (analysis_info->valid)
+    if (analysis_info.valid)
     {
        int analysis_bandwidth;
        if (st->signal_type == OPUS_AUTO)
-          st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob));
+          st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
 
-       analysis_bandwidth = analysis_info->bandwidth;
+       analysis_bandwidth = analysis_info.bandwidth;
        if (analysis_bandwidth<=12)
           st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
        else if (analysis_bandwidth<=14)
@@ -1281,6 +1366,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        VARDECL(OpusRepacketizer, rp);
        opus_int32 bytes_per_frame;
 
+       if (analysis_read_pos_bak!= -1)
+       {
+          st->analysis.read_pos = analysis_read_pos_bak;
+          st->analysis.read_subframe = analysis_read_subframe_bak;
+       }
 
        nb_frames = frame_size > st->Fs/25 ? 3 : 2;
        bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames);
@@ -1310,11 +1400,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
           /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
           if (to_celt && i==nb_frames-1)
              st->user_forced_mode = MODE_CELT_ONLY;
-          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth
-#ifndef FIXED_POINT
-                , analysis_info
-#endif
-                );
+          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
+                tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
+                NULL, 0, c1, c2, analysis_channels, downmix);
           if (tmp_len<0)
           {
              RESTORE_STACK;
@@ -1419,6 +1507,45 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
             st->silk_mode.bitRate = total_bitRate;
         }
 
+        /* Surround masking for SILK */
+        if (st->energy_masking && st->use_vbr && !st->lfe)
+        {
+           opus_val32 mask_sum=0;
+           opus_val16 masking_depth;
+           opus_int32 rate_offset;
+           int c;
+           int end = 17;
+           opus_int16 srate = 16000;
+           if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
+           {
+              end = 13;
+              srate = 8000;
+           } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+           {
+              end = 15;
+              srate = 12000;
+           }
+           for (c=0;c<st->channels;c++)
+           {
+              for(i=0;i<end;i++)
+              {
+                 opus_val16 mask;
+                 mask = MAX16(MIN16(st->energy_masking[21*c+i],
+                        QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+                 if (mask > 0)
+                    mask = HALF16(mask);
+                 mask_sum += mask;
+              }
+           }
+           /* Conservative rate reduction, we cut the masking in half */
+           masking_depth = HALF16(mask_sum / end*st->channels);
+           rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);
+           rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);
+           rate_offset += QCONST16(.4f, DB_SHIFT);
+           st->silk_mode.bitRate += rate_offset;
+           bytes_target += rate_offset * frame_size / (8 * st->Fs);
+        }
+
         st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
         st->silk_mode.nChannelsAPI = st->channels;
         st->silk_mode.nChannelsInternal = st->stream_channels;
@@ -1476,9 +1603,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
         if (prefill)
         {
             opus_int32 zero=0;
-            const CELTMode *celt_mode;
             int prefill_offset;
-            celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
             /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode
                a discontinuity. The exact location is what we need to avoid leaving any "gap"
                in the audio when mixing with the redundant CELT frame. Here we can afford to
@@ -1589,12 +1714,12 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
             if (st->use_vbr)
             {
                 opus_int32 bonus=0;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
                 if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
                 {
                    bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);
-                   if (analysis_info->valid)
-                      bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info->tonality));
+                   if (analysis_info.valid)
+                      bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality));
                 }
 #endif
                 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
@@ -1625,9 +1750,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     /* gain_fade() and stereo_fade() need to be after the buffer copying
        because we don't want any of this to affect the SILK part */
     if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
-       const CELTMode *celt_mode;
-
-       celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
        gain_fade(pcm_buf, pcm_buf,
              st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
     }
@@ -1638,9 +1760,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
         /* Apply stereo width reduction (at low bitrates) */
         if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
             opus_val16 g1, g2;
-            const CELTMode *celt_mode;
-
-            celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
             g1 = st->hybrid_stereo_width_Q14;
             g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);
 #ifdef FIXED_POINT
@@ -1697,9 +1816,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        ec_enc_shrink(&enc, nb_compr_bytes);
     }
 
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
     if (redundancy || st->mode != MODE_SILK_ONLY)
-       celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
+       celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
 #endif
 
     /* 5 ms redundant frame for CELT->SILK */
@@ -1825,114 +1944,88 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
 #ifdef FIXED_POINT
 
 #ifndef DISABLE_FLOAT_API
-opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
       unsigned char *data, opus_int32 max_data_bytes)
 {
    int i, ret;
+   int frame_size;
+   int delay_compensation;
    VARDECL(opus_int16, in);
    ALLOC_STACK;
 
-   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
-   if(frame_size<0)
-   {
-      RESTORE_STACK;
-      return OPUS_BAD_ARG;
-   }
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
 
    ALLOC(in, frame_size*st->channels, opus_int16);
 
    for (i=0;i<frame_size*st->channels;i++)
       in[i] = FLOAT2INT16(pcm[i]);
-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
    RESTORE_STACK;
    return ret;
 }
 #endif
 
-opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
                 unsigned char *data, opus_int32 out_data_bytes)
 {
-   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
-   if(frame_size<0)
-   {
-      return OPUS_BAD_ARG;
-   }
-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16);
+   int frame_size;
+   int delay_compensation;
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
 }
 
 #else
-opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
       unsigned char *data, opus_int32 max_data_bytes)
 {
    int i, ret;
-   const CELTMode *celt_mode;
+   int frame_size;
    int delay_compensation;
-   int lsb_depth;
    VARDECL(float, in);
-   AnalysisInfo analysis_info;
    ALLOC_STACK;
 
-   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
       delay_compensation = 0;
    else
       delay_compensation = st->delay_compensation;
-
-   lsb_depth = IMIN(16, st->lsb_depth);
-
-   analysis_info.valid = 0;
-   if (st->silk_mode.complexity >= 7 && st->Fs==48000)
-   {
-      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
-            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info);
-   } else {
-      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
-   }
-   if(frame_size<0)
-   {
-      RESTORE_STACK;
-      return OPUS_BAD_ARG;
-   }
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
 
    ALLOC(in, frame_size*st->channels, float);
 
    for (i=0;i<frame_size*st->channels;i++)
       in[i] = (1.0f/32768)*pcm[i];
-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
    RESTORE_STACK;
    return ret;
 }
-opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
                       unsigned char *data, opus_int32 out_data_bytes)
 {
-   const CELTMode *celt_mode;
+   int frame_size;
    int delay_compensation;
-   int lsb_depth;
-   AnalysisInfo analysis_info;
-
-   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
       delay_compensation = 0;
    else
       delay_compensation = st->delay_compensation;
-
-   lsb_depth = IMIN(24, st->lsb_depth);
-
-   analysis_info.valid = 0;
-   if (st->silk_mode.complexity >= 7 && st->Fs==48000)
-   {
-      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
-            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info);
-   } else {
-      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
-   }
-   if(frame_size<0)
-   {
-      return OPUS_BAD_ARG;
-   }
-
-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info);
-
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
 }
 #endif
 
@@ -2349,20 +2442,10 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
             ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
         }
         break;
-        case OPUS_SET_ENERGY_SAVE_REQUEST:
-        {
-            opus_val16 *value = va_arg(ap, opus_val16*);
-            if (!value)
-            {
-               goto bad_arg;
-            }
-            ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value));
-        }
-        break;
         case OPUS_SET_ENERGY_MASK_REQUEST:
         {
             opus_val16 *value = va_arg(ap, opus_val16*);
-            st->energy_masking = (value!=NULL);
+            st->energy_masking = value;
             ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
         }
         break;
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index 4cddbffd552495b8a1dfa0caa665b28d504d9cb3..31447bf9c19383890e96baaca2eac4c1a20e53a6 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -36,8 +36,11 @@
 #include <stdarg.h>
 #include "float_cast.h"
 #include "os_support.h"
-#include "analysis.h"
 #include "mathops.h"
+#include "mdct.h"
+#include "modes.h"
+#include "bands.h"
+#include "quant_bands.h"
 
 typedef struct {
    int nb_streams;
@@ -57,17 +60,65 @@ static const VorbisLayout vorbis_mappings[8] = {
       {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */
 };
 
+typedef void (*opus_copy_channel_in_func)(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size
+);
+
 struct OpusMSEncoder {
-   TonalityAnalysisState analysis;
    ChannelLayout layout;
    int lfe_stream;
+   int application;
    int variable_duration;
    int surround;
    opus_int32 bitrate_bps;
    opus_val32 subframe_mem[3];
    /* Encoder states go here */
+   /* then opus_val32 window_mem[channels*120]; */
+   /* then opus_val32 preemph_mem[channels]; */
 };
 
+static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
+{
+   int s;
+   char *ptr;
+   int coupled_size, mono_size;
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+   }
+   return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
+}
+
+static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
+{
+   int s;
+   char *ptr;
+   int coupled_size, mono_size;
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+   }
+   return (opus_val32*)ptr;
+}
 
 static int validate_encoder_layout(const ChannelLayout *layout)
 {
@@ -88,6 +139,206 @@ static int validate_encoder_layout(const ChannelLayout *layout)
    return 1;
 }
 
+static void channel_pos(int channels, int pos[8])
+{
+   /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
+   if (channels==4)
+   {
+      pos[0]=1;
+      pos[1]=3;
+      pos[2]=1;
+      pos[3]=3;
+   } else if (channels==3||channels==5||channels==6)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=0;
+   } else if (channels==7)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=2;
+      pos[6]=0;
+   } else if (channels==8)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=1;
+      pos[6]=3;
+      pos[7]=0;
+   }
+}
+
+#if 1
+/* Computes a rough approximation of log2(2^a + 2^b) */
+static opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+   opus_val16 max;
+   opus_val32 diff;
+   opus_val16 frac;
+   static const opus_val16 diff_table[17] = {
+         QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT),
+         QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT),
+         QCONST16(0.0028123f, DB_SHIFT)
+   };
+   int low;
+   if (a>b)
+   {
+      max = a;
+      diff = SUB32(EXTEND32(a),EXTEND32(b));
+   } else {
+      max = b;
+      diff = SUB32(EXTEND32(b),EXTEND32(a));
+   }
+   if (diff >= QCONST16(8.f, DB_SHIFT))
+      return max;
+#ifdef FIXED_POINT
+   low = SHR32(diff, DB_SHIFT-1);
+   frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT);
+#else
+   low = floor(2*diff);
+   frac = 2*diff - low;
+#endif
+   return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low]));
+}
+#else
+opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+   return log2(pow(4, a)+ pow(4, b))/2;
+}
+#endif
+
+void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
+      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+)
+{
+   int c;
+   int i;
+   int LM;
+   int pos[8] = {0};
+   int upsample;
+   int frame_size;
+   opus_val32 bandE[21];
+   opus_val16 maskLogE[3][21];
+   VARDECL(opus_val32, in);
+   VARDECL(opus_val16, x);
+   VARDECL(opus_val32, out);
+   SAVE_STACK;
+
+   upsample = resampling_factor(rate);
+   frame_size = len*upsample;
+
+   for (LM=0;LM<=celt_mode->maxLM;LM++)
+      if (celt_mode->shortMdctSize<<LM==frame_size)
+         break;
+
+   ALLOC(in, frame_size+overlap, opus_val32);
+   ALLOC(x, len, opus_val16);
+   ALLOC(freq, frame_size, opus_val32);
+
+   channel_pos(channels, pos);
+
+   for (c=0;c<3;c++)
+      for (i=0;i<21;i++)
+         maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT);
+
+   for (c=0;c<channels;c++)
+   {
+      OPUS_COPY(in, mem+c*overlap, overlap);
+      (*copy_channel_in)(x, 1, pcm, channels, c, len);
+      preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
+      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+      if (upsample != 1)
+      {
+         int bound = len;
+         for (i=0;i<bound;i++)
+            freq[i] *= upsample;
+         for (;i<frame_size;i++)
+            freq[i] = 0;
+      }
+
+      compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);
+      amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);
+      /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */
+      for (i=1;i<21;i++)
+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT));
+      for (i=19;i>=0;i--)
+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT));
+      if (pos[c]==1)
+      {
+         for (i=0;i<21;i++)
+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]);
+      } else if (pos[c]==3)
+      {
+         for (i=0;i<21;i++)
+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]);
+      } else if (pos[c]==2)
+      {
+         for (i=0;i<21;i++)
+         {
+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+         }
+      }
+#if 0
+      for (i=0;i<21;i++)
+         printf("%f ", bandLogE[21*c+i]);
+//#else
+      float sum=0;
+      for (i=0;i<21;i++)
+         sum += bandLogE[21*c+i];
+      printf("%f ", sum/21);
+#endif
+      OPUS_COPY(mem+c*overlap, in+frame_size, overlap);
+   }
+   for (i=0;i<21;i++)
+      maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]);
+   for (c=0;c<3;c++)
+      for (i=0;i<21;i++)
+         maskLogE[c][i] += QCONST16(.5f, DB_SHIFT)*log2(2.f/(channels-1));
+#if 0
+   for (c=0;c<3;c++)
+   {
+      for (i=0;i<21;i++)
+         printf("%f ", maskLogE[c][i]);
+   }
+#endif
+   for (c=0;c<channels;c++)
+   {
+      opus_val16 *mask;
+      if (pos[c]!=0)
+      {
+         mask = &maskLogE[pos[c]-1][0];
+         for (i=0;i<21;i++)
+            bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i];
+      } else {
+         for (i=0;i<21;i++)
+            bandLogE[21*c+i] = 0;
+      }
+#if 0
+      for (i=0;i<21;i++)
+         printf("%f ", bandLogE[21*c+i]);
+      printf("\n");
+#endif
+#if 0
+      float sum=0;
+      for (i=0;i<21;i++)
+         sum += bandLogE[21*c+i];
+      printf("%f ", sum/21);
+      printf("\n");
+#endif
+   }
+   RESTORE_STACK;
+}
 
 opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams)
 {
@@ -132,7 +383,9 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_
       return 0;
    size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
    if (channels>2)
-      size += align(opus_encoder_get_size(2));
+   {
+      size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));
+   }
    return size;
 }
 
@@ -161,10 +414,10 @@ static int opus_multistream_encoder_init_impl(
    st->layout.nb_streams = streams;
    st->layout.nb_coupled_streams = coupled_streams;
    st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;
-   OPUS_CLEAR(&st->analysis,1);
    if (!surround)
       st->lfe_stream = -1;
    st->bitrate_bps = OPUS_AUTO;
+   st->application = application;
    st->variable_duration = OPUS_FRAMESIZE_ARG;
    for (i=0;i<st->layout.nb_channels;i++)
       st->layout.mapping[i] = mapping[i];
@@ -192,10 +445,8 @@ static int opus_multistream_encoder_init_impl(
    }
    if (surround)
    {
-      OpusEncoder *downmix_enc;
-      downmix_enc = (OpusEncoder*)ptr;
-      ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO);
-      if(ret!=OPUS_OK)return ret;
+      OPUS_CLEAR(ms_get_preemph_mem(st), channels);
+      OPUS_CLEAR(ms_get_window_mem(st), channels*120);
    }
    st->surround = surround;
    return OPUS_OK;
@@ -339,22 +590,6 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
    return st;
 }
 
-typedef void (*opus_copy_channel_in_func)(
-  opus_val16 *dst,
-  int dst_stride,
-  const void *src,
-  int src_stride,
-  int src_channel,
-  int frame_size
-);
-
-typedef void (*opus_surround_downmix_funct)(
-  opus_val16 *dst,
-  const void *src,
-  int channels,
-  int frame_size
-);
-
 static void surround_rate_allocation(
       OpusMSEncoder *st,
       opus_int32 *rate,
@@ -433,15 +668,11 @@ static int opus_multistream_encode_native
     OpusMSEncoder *st,
     opus_copy_channel_in_func copy_channel_in,
     const void *pcm,
-    int frame_size,
+    int analysis_frame_size,
     unsigned char *data,
     opus_int32 max_data_bytes,
     int lsb_depth,
-    opus_surround_downmix_funct surround_downmix
-#ifndef FIXED_POINT
-    , downmix_func downmix
-    , const void *pcm_analysis
-#endif
+    downmix_func downmix
 )
 {
    opus_int32 Fs;
@@ -451,31 +682,29 @@ static int opus_multistream_encode_native
    char *ptr;
    int tot_size;
    VARDECL(opus_val16, buf);
+   VARDECL(opus_val16, bandSMR);
    unsigned char tmp_data[MS_FRAME_TMP];
    OpusRepacketizer rp;
    opus_int32 complexity;
-#ifndef FIXED_POINT
-   AnalysisInfo analysis_info;
-#endif
    const CELTMode *celt_mode;
    opus_int32 bitrates[256];
    opus_val16 bandLogE[42];
-   opus_val16 bandLogE_mono[21];
+   opus_val32 *mem = NULL;
+   opus_val32 *preemph_mem=NULL;
+   int frame_size;
    ALLOC_STACK;
 
+   if (st->surround)
+   {
+      preemph_mem = ms_get_preemph_mem(st);
+      mem = ms_get_window_mem(st);
+   }
+
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity));
    opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
 
-   if (400*frame_size < Fs)
-   {
-      RESTORE_STACK;
-      return OPUS_BAD_ARG;
-   }
-#ifndef FIXED_POINT
-   analysis_info.valid = 0;
-   if (complexity >= 7 && Fs==48000)
    {
       opus_int32 delay_compensation;
       int channels;
@@ -483,13 +712,15 @@ static int opus_multistream_encode_native
       channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
       opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
       delay_compensation -= Fs/400;
+      frame_size = compute_frame_size(pcm, analysis_frame_size,
+            st->variable_duration, channels, Fs, st->bitrate_bps,
+            delay_compensation, downmix, st->subframe_mem);
+   }
 
-      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis,
-            frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info);
-   } else
-#endif
+   if (400*frame_size < Fs)
    {
-      frame_size = frame_size_select(frame_size, st->variable_duration, Fs);
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
    }
    /* Validate frame_size before using it to allocate stack space.
       This mirrors the checks in opus_encode[_float](). */
@@ -504,42 +735,10 @@ static int opus_multistream_encode_native
    coupled_size = opus_encoder_get_size(2);
    mono_size = opus_encoder_get_size(1);
 
+   ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
    if (st->surround)
    {
-      int i;
-      unsigned char dummy[512];
-      /* Temporary kludge -- remove */
-      OpusEncoder *downmix_enc;
-
-      ptr = (char*)st + align(sizeof(OpusMSEncoder));
-      for (s=0;s<st->layout.nb_streams;s++)
-      {
-         if (s < st->layout.nb_coupled_streams)
-            ptr += align(coupled_size);
-         else
-            ptr += align(mono_size);
-      }
-      downmix_enc = (OpusEncoder*)ptr;
-      surround_downmix(buf, pcm, st->layout.nb_channels, frame_size);
-      opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE));
-      opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
-      opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
-      opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2));
-      opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth
-#ifndef FIXED_POINT
-            , &analysis_info
-#endif
-            );
-      /* Combines the left and right mask into a centre mask. We
-         use an approximation for the log of the sum of the energies. */
-      for(i=0;i<21;i++)
-      {
-         opus_val16 diff;
-         diff = ABS16(SUB16(bandLogE[i], bandLogE[21+i]));
-         diff = diff + HALF16(diff);
-         diff = SHR32(HALF32(celt_exp2(-diff)), 16-DB_SHIFT);
-         bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]) + diff;
-      }
+      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
    }
 
    if (max_data_bytes < 4*st->layout.nb_streams-1)
@@ -563,10 +762,24 @@ static int opus_multistream_encode_native
       opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
       if (st->surround)
       {
-         opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
-         opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+         opus_int32 equiv_rate;
+         equiv_rate = st->bitrate_bps;
+         if (frame_size*50 < Fs)
+            equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;
+         if (equiv_rate > 112000)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+         else if (equiv_rate > 76000)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND));
+         else if (equiv_rate > 48000)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND));
+         else
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
          if (s < st->layout.nb_coupled_streams)
+         {
+            /* To preserve the spatial image, force stereo CELT on coupled streams */
+            opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
             opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
+         }
       }
    }
 
@@ -578,11 +791,13 @@ static int opus_multistream_encode_native
       OpusEncoder *enc;
       int len;
       int curr_max;
+      int c1, c2;
 
       opus_repacketizer_init(&rp);
       enc = (OpusEncoder*)ptr;
       if (s < st->layout.nb_coupled_streams)
       {
+         int i;
          int left, right;
          left = get_left_channel(&st->layout, s, -1);
          right = get_right_channel(&st->layout, s, -1);
@@ -591,28 +806,39 @@ static int opus_multistream_encode_native
          (*copy_channel_in)(buf+1, 2,
             pcm, st->layout.nb_channels, right, frame_size);
          ptr += align(coupled_size);
-         /* FIXME: This isn't correct for the coupled center channels in
-            6.1 surround configuration */
          if (st->surround)
-            opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
+         {
+            for (i=0;i<21;i++)
+            {
+               bandLogE[i] = bandSMR[21*left+i];
+               bandLogE[21+i] = bandSMR[21*right+i];
+            }
+         }
+         c1 = left;
+         c2 = right;
       } else {
+         int i;
          int chan = get_mono_channel(&st->layout, s, -1);
          (*copy_channel_in)(buf, 1,
             pcm, st->layout.nb_channels, chan, frame_size);
          ptr += align(mono_size);
          if (st->surround)
-            opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono));
+         {
+            for (i=0;i<21;i++)
+               bandLogE[i] = bandSMR[21*chan+i];
+         }
+         c1 = chan;
+         c2 = -1;
       }
+      if (st->surround)
+         opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
       /* number of bytes left (+Toc) */
       curr_max = max_data_bytes - tot_size;
       /* Reserve three bytes for the last stream and four for the others */
       curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
       curr_max = IMIN(curr_max,MS_FRAME_TMP);
-      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth
-#ifndef FIXED_POINT
-            , &analysis_info
-#endif
-            );
+      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,
+            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);
       if (len<0)
       {
          RESTORE_STACK;
@@ -626,50 +852,12 @@ static int opus_multistream_encode_native
       data += len;
       tot_size += len;
    }
+   /*printf("\n");*/
    RESTORE_STACK;
    return tot_size;
 
 }
 
-static void channel_pos(int channels, int pos[8])
-{
-   /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
-   if (channels==4)
-   {
-      pos[0]=1;
-      pos[1]=3;
-      pos[2]=1;
-      pos[3]=3;
-   } else if (channels==3||channels==5||channels==6)
-   {
-      pos[0]=1;
-      pos[1]=2;
-      pos[2]=3;
-      pos[3]=1;
-      pos[4]=3;
-      pos[5]=0;
-   } else if (channels==7)
-   {
-      pos[0]=1;
-      pos[1]=2;
-      pos[2]=3;
-      pos[3]=1;
-      pos[4]=3;
-      pos[5]=2;
-      pos[6]=0;
-   } else if (channels==8)
-   {
-      pos[0]=1;
-      pos[1]=2;
-      pos[2]=3;
-      pos[3]=1;
-      pos[4]=3;
-      pos[5]=1;
-      pos[6]=3;
-      pos[7]=0;
-   }
-}
-
 #if !defined(DISABLE_FLOAT_API)
 static void opus_copy_channel_in_float(
   opus_val16 *dst,
@@ -690,57 +878,6 @@ static void opus_copy_channel_in_float(
       dst[i*dst_stride] = float_src[i*src_stride+src_channel];
 #endif
 }
-
-static void opus_surround_downmix_float(
-  opus_val16 *dst,
-  const void *src,
-  int channels,
-  int frame_size
-)
-{
-   const float *float_src;
-   opus_int32 i;
-   int pos[8] = {0};
-   int c;
-   float_src = (const float *)src;
-
-   channel_pos(channels, pos);
-   for (i=0;i<2*frame_size;i++)
-      dst[i]=0;
-
-   for (c=0;c<channels;c++)
-   {
-      if (pos[c]==1)
-      {
-         for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-            dst[2*i] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);
-#else
-            dst[2*i] += float_src[i*channels+c];
-#endif
-      } else if (pos[c]==3)
-      {
-         for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-            dst[2*i+1] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);
-#else
-            dst[2*i+1] += float_src[i*channels+c];
-#endif
-      } else if (pos[c]==2)
-      {
-         for (i=0;i<frame_size;i++)
-         {
-#if defined(FIXED_POINT)
-            dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15);
-            dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15);
-#else
-            dst[2*i] += .707f*float_src[i*channels+c];
-            dst[2*i+1] += .707f*float_src[i*channels+c];
-#endif
-         }
-      }
-   }
-}
 #endif
 
 static void opus_copy_channel_in_short(
@@ -763,57 +900,6 @@ static void opus_copy_channel_in_short(
 #endif
 }
 
-static void opus_surround_downmix_short(
-  opus_val16 *dst,
-  const void *src,
-  int channels,
-  int frame_size
-)
-{
-   const opus_int16 *short_src;
-   opus_int32 i;
-   int pos[8] = {0};
-   int c;
-   short_src = (const opus_int16 *)src;
-
-   channel_pos(channels, pos);
-   for (i=0;i<2*frame_size;i++)
-      dst[i]=0;
-
-   for (c=0;c<channels;c++)
-   {
-      if (pos[c]==1)
-      {
-         for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-            dst[2*i] += SHR16(short_src[i*channels+c],3);
-#else
-            dst[2*i] += (1/32768.f)*short_src[i*channels+c];
-#endif
-      } else if (pos[c]==3)
-      {
-         for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-            dst[2*i+1] += SHR16(short_src[i*channels+c],3);
-#else
-            dst[2*i+1] += (1/32768.f)*short_src[i*channels+c];
-#endif
-      } else if (pos[c]==2)
-      {
-         for (i=0;i<frame_size;i++)
-         {
-#if defined(FIXED_POINT)
-            dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15);
-            dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15);
-#else
-            dst[2*i] += (.707f/32768.f)*short_src[i*channels+c];
-            dst[2*i+1] += (.707f/32768.f)*short_src[i*channels+c];
-#endif
-         }
-      }
-   }
-}
-
 
 #ifdef FIXED_POINT
 int opus_multistream_encode(
@@ -825,7 +911,7 @@ int opus_multistream_encode(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
 }
 
 #ifndef DISABLE_FLOAT_API
@@ -838,7 +924,7 @@ int opus_multistream_encode_float(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_float);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_float);
 }
 #endif
 
@@ -853,9 +939,8 @@ int opus_multistream_encode_float
     opus_int32 max_data_bytes
 )
 {
-   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset);
+      pcm, frame_size, data, max_data_bytes, 24, downmix_float);
 }
 
 int opus_multistream_encode(
@@ -866,9 +951,8 @@ int opus_multistream_encode(
     opus_int32 max_data_bytes
 )
 {
-   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
 }
 #endif
 
diff --git a/src/opus_private.h b/src/opus_private.h
index 9d8210b5f05441812946dfb360c13a447429e1e6..0e739ebb0b0ed5691425b9a73a78805a930b3e73 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -82,9 +82,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
 #define OPUS_SET_FORCE_MODE_REQUEST    11002
 #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
 
-typedef void (*downmix_func)(const void *, float *, int, int, int);
-void downmix_float(const void *_x, float *sub, int subframe, int offset, int C);
-void downmix_int(const void *_x, float *sub, int subframe, int offset, int C);
+typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
 
 int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
                 int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
@@ -94,12 +94,13 @@ int encode_size(int size, unsigned char *data);
 
 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
 
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+      int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem);
+
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
-      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
-#ifndef FIXED_POINT
-                , AnalysisInfo *analysis_info
-#endif
-      );
+      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
 
 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
       opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
diff --git a/src/tansig_table.h b/src/tansig_table.h
index 885ea3e8d7dbd35dace96078db39c53d808557a2..c76f844a72f428eb0c1abf87f399acc8074118ca 100644
--- a/src/tansig_table.h
+++ b/src/tansig_table.h
@@ -1,6 +1,6 @@
 /* This file is auto-generated by gen_tables */
 
-static const opus_val16 tansig_table[201] = {
+static const float tansig_table[201] = {
 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,