diff --git a/silk/control_codec.c b/silk/control_codec.c
index b1d3bab9fb1d10fbf0d902d073b12ff957a18025..64b109152641a3810cfd793462bbd3ac906ff1fb 100644
--- a/silk/control_codec.c
+++ b/silk/control_codec.c
@@ -37,6 +37,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 #include "tuning_parameters.h"
 
+
+static const int enc_delay_matrix[3][5] = {
+/*SILK API 8  12  16  24  48 */
+/* 8 */   {5,  0,  3,  4,  8},
+/*12 */   {0,  6,  0,  0,  0},
+/*16 */   {4,  5, 11,  5, 18}
+};
+
 opus_int silk_setup_resamplers(
     silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
     opus_int                         fs_kHz              /* I                        */
@@ -235,6 +243,9 @@ opus_int silk_setup_fs(
         psEnc->sCmn.TargetRate_bps = 0;         /* trigger new SNR computation */
     }
 
+    psEnc->sCmn.delay = enc_delay_matrix[rateID(fs_kHz*1000)][rateID(psEnc->sCmn.API_fs_Hz)];
+    silk_assert(psEnc->sCmn.delay <= MAX_ENCODER_DELAY);
+
     /* Set internal sampling frequency */
     silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
     silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
diff --git a/silk/dec_API.c b/silk/dec_API.c
index 675bfb99657008185b51a4394da44efde7107155..5e676932e07a6e0beb76f3a2d2dc37a8fd85abd6 100644
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -31,6 +31,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "API.h"
 #include "main.h"
 
+static const int dec_delay_matrix[3][5] = {
+/*SILK API 8  12  16  24  48 */
+/* 8 */   {3, 0, 2, 0, 0},
+/*12 */   {0, 8, 5, 7, 5},
+/*16 */   {0, 0, 8, 5, 5}
+};
+
+
 /************************/
 /* Decoder Super Struct */
 /************************/
@@ -82,12 +90,15 @@ opus_int silk_Decode(
 {
     opus_int   i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR;
     opus_int32 nSamplesOutDec, LBRR_symbol;
-    opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ];
+    opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ];
     opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
     opus_int16 *resample_out_ptr;
     silk_decoder *psDec = ( silk_decoder * )decState;
     silk_decoder_state *channel_state = psDec->channel_state;
+    int delay;
+
+    delay = channel_state[ 0 ].delay;
 
     /**********************************/
     /* Test if first frame in payload */
@@ -106,6 +117,7 @@ opus_int silk_Decode(
         ret += silk_init_decoder( &channel_state[ 1 ] );
         if( psDec->nChannelsAPI == 2 ) {
             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
+            silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, MAX_DECODER_DELAY*sizeof(opus_int16));
         }
     }
 
@@ -143,9 +155,12 @@ opus_int silk_Decode(
 
     /* Initialize resampler when switching internal or external sampling frequency */
     if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) {
+        channel_state[ 0 ].delay = dec_delay_matrix[rateID(silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ))][rateID(decControl->API_sampleRate)];
+        silk_assert(channel_state[ 0 ].delay <= MAX_DECODER_DELAY);
         ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate );
         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
+            channel_state[ 1 ].delay = channel_state[ 0 ].delay;
         }
     }
     channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate;
@@ -230,19 +245,19 @@ opus_int silk_Decode(
     /* Call decoder for one frame */
     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
         if( n == 0 || decode_only_middle == 0 ) {
-            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag );
+            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 + delay ], &nSamplesOutDec, lostFlag );
         } else {
-            silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
+            silk_memset( &samplesOut1_tmp[ n ][ 2 + delay ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
         }
     }
 
     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
         /* Convert Mid/Side to Left/Right */
-        silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
+        silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][delay], &samplesOut1_tmp[ 1 ][delay], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
     } else {
         /* Buffering */
-        silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
-        silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
+        silk_memcpy( &samplesOut1_tmp[ 0 ][delay], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+        silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) );
     }
 
     /* Number of output samples */
@@ -256,8 +271,11 @@ opus_int silk_Decode(
     }
 
     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
+
+        silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY-delay ], delay*sizeof(opus_int16));
         /* Resample decoded signal to API_sampleRate */
         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
+        silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY*sizeof(opus_int16));
 
         /* Interleave if stereo output and stereo stream */
         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
diff --git a/silk/define.h b/silk/define.h
index dcfeb1d311a1e3e19227760708878bae59dcdaf0..c7cbdcf8f9b01752b835438dc9da3e31b88332ca 100644
--- a/silk/define.h
+++ b/silk/define.h
@@ -86,6 +86,9 @@ extern "C"
 #define MAX_FRAME_LENGTH_MS                     ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR )
 #define MAX_FRAME_LENGTH                        ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ )
 
+#define MAX_ENCODER_DELAY                       18
+#define MAX_DECODER_DELAY                        8
+
 /* Milliseconds of lookahead for pitch analysis */
 #define LA_PITCH_MS                             2
 #define LA_PITCH_MAX                            ( LA_PITCH_MS * MAX_FS_KHZ )
diff --git a/silk/enc_API.c b/silk/enc_API.c
index 0fe945b6abe4de05a976ce7aea9c895f2976eae4..403aeccea640a942ef07fb7ad8ae62059113ec37 100644
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -138,8 +138,8 @@ opus_int silk_Encode(
     opus_int   speech_act_thr_for_switch_Q8;
     opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol;
     silk_encoder *psEnc = ( silk_encoder * )encState;
-    opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];
-    opus_int transition;
+    opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ + MAX_ENCODER_DELAY];
+    opus_int transition, delay;
 
     psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
 
@@ -222,6 +222,7 @@ opus_int silk_Encode(
     }
     silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
 
+    delay = psEnc->state_Fxx[ 0 ].sCmn.delay;
     /* Input buffering/resampling and encoding */
     while( 1 ) {
         nSamplesToBuffer  = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
@@ -231,12 +232,15 @@ opus_int silk_Encode(
         if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
             int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
             for( n = 0; n < nSamplesFromInput; n++ ) {
-                buf[ n ] = samplesIn[ 2 * n ];
+                    buf[ n+delay ] = samplesIn[ 2 * n ];
             }
+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
             /* Making sure to start both resamplers from the same state when switching from mono to stereo */
             if(psEnc->nPrevChannelsInternal == 1 && id==0) {
-               silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
+               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
+               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16));
             }
+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
 
             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@@ -245,23 +249,31 @@ opus_int silk_Encode(
             nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
             nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
             for( n = 0; n < nSamplesFromInput; n++ ) {
-                buf[ n ] = samplesIn[ 2 * n + 1 ];
+                    buf[ n+delay ] = samplesIn[ 2 * n + 1 ];
             }
+            silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
             ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
                 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+
             psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
         } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
             /* Combine left and right channels before resampling */
             for( n = 0; n < nSamplesFromInput; n++ ) {
-                buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ],  1 );
+                buf[ n+delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ],  1 );
             }
+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
         } else {
             silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
+            silk_memcpy(buf+delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));
+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
-                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], samplesIn, nSamplesFromInput );
+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
         }
 
diff --git a/silk/main.h b/silk/main.h
index d7ed22ceb2a0ef442a901ce8fd5706799f28f04f..981c7cab05fcb27801f52a117b55c774c8544600 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -43,6 +43,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /* Uncomment the next line to force a fixed internal sampling rate (independent of what bitrate is used */
 /*#define FORCE_INTERNAL_FS_KHZ       16*/
 
+/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0,1,2,3,4] */
+#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 )
 
 /* Convert Left/Right stereo signal to adaptive Mid/Side representation */
 void silk_stereo_LR_to_MS(
diff --git a/silk/structs.h b/silk/structs.h
index eda8173b7d8b0658f9ddcff45e85c43c7fdede67..70c81baeebff1cacc1f7aee7169225fbb32253a3 100644
--- a/silk/structs.h
+++ b/silk/structs.h
@@ -149,6 +149,7 @@ typedef struct {
     opus_int                         minInternal_fs_Hz;              /* Minimum internal sampling frequency (Hz)                             */
     opus_int                         desiredInternal_fs_Hz;          /* Soft request for internal sampling frequency (Hz)                    */
     opus_int                         fs_kHz;                         /* Internal sampling frequency (kHz)                                    */
+    opus_int                         delay;                          /* Number of samples of delay to apply */
     opus_int                         nb_subfr;                       /* Number of 5 ms subframes in a frame                                  */
     opus_int                         frame_length;                   /* Frame length (samples)                                               */
     opus_int                         subfr_length;                   /* Subframe length (samples)                                            */
@@ -192,6 +193,7 @@ typedef struct {
 
     /* Input/output buffering */
     opus_int16                       inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal                                   */
+    opus_int16                       delayBuf[MAX_ENCODER_DELAY];
     opus_int                         inputBufIx;
     opus_int                         nFramesPerPacket;
     opus_int                         nFramesEncoded;                 /* Number of frames analyzed in current packet                          */
@@ -257,6 +259,8 @@ typedef struct {
     opus_int32       sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
     opus_int32       exc_Q10[ MAX_FRAME_LENGTH ];
     opus_int16       outBuf[ 2 * MAX_FRAME_LENGTH ];             /* Buffer for output signal                                             */
+    opus_int16       delayBuf[ MAX_DECODER_DELAY ];              /* Buffer for delaying the SILK output prior to resampling              */
+    opus_int         delay;                                      /* How much decoder delay to add                                        */
     opus_int         lagPrev;                                    /* Previous Lag                                                         */
     opus_int8        LastGainIndex;                              /* Previous gain index                                                  */
     opus_int         fs_kHz;                                     /* Sampling frequency in kHz                                            */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index ad3279f8813f929e2c901d29a5378af4dc1aac50..1a6e38f14c0f89bddb6eaca4d4f541cd58d9a195 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -123,6 +123,11 @@ static const opus_int32 mode_thresholds[2][2] = {
       {  48000,      24000}, /* mono */
       {  48000,      24000}, /* stereo */
 };
+
+static const int celt_delay_table[5] = {
+/* API 8  12  16  24  48 */
+      10, 16, 21, 27, 55
+};
 int opus_encoder_get_size(int channels)
 {
     int silkEncSizeBytes, celtEncSizeBytes;
@@ -202,14 +207,8 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
     st->encoder_buffer = st->Fs/100;
 
     st->delay_compensation = st->Fs/400;
-    /* This part is meant to compensate for the resampler delay as a function
-       of the API sampling rate */
-    if (st->Fs == 48000)
-        st->delay_compensation += 23;
-    else if (st->Fs == 24000)
-       st->delay_compensation += 15;
-    else
-       st->delay_compensation += 2;
+
+    st->delay_compensation += celt_delay_table[rateID(st->Fs)];
 
     st->hybrid_stereo_width_Q14             = 1 << 14;
     st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
@@ -486,7 +485,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
     }
 #endif
 
-    if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0) 
+    if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)
     {
        /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
        st->silk_mode.toMono = 1;