diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index f52adfc972dae2f193914053fa5ada6bf51fc5ba..a666d1d1d87f52679c76d650defcc603959ee2b8 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -170,9 +170,9 @@ struct buf_2d {
 };
 
 struct macroblockd_plane {
-  DECLARE_ALIGNED(16, int16_t,  qcoeff[64 * 64]);
-  DECLARE_ALIGNED(16, int16_t,  dqcoeff[64 * 64]);
-  DECLARE_ALIGNED(16, uint16_t, eobs[256]);
+  int16_t *qcoeff;
+  int16_t *dqcoeff;
+  uint16_t *eobs;
   PLANE_TYPE plane_type;
   int subsampling_x;
   int subsampling_y;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index aad400aa1a408e2c62bf7a6654fb34c2dd420ff8..218fdd81314558bfca9dd37913637676ec9b8d34 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -42,6 +42,9 @@ typedef struct TileWorkerData {
   vp9_reader bit_reader;
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
   DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
+  DECLARE_ALIGNED(16, int16_t,  qcoeff[MAX_MB_PLANE][64 * 64]);
+  DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
 } TileWorkerData;
 
 static int read_be32(const uint8_t *p) {
@@ -931,6 +934,19 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
   return end;
 }
 
+static void setup_tile_macroblockd(TileWorkerData *const tile_data) {
+  MACROBLOCKD *xd = &tile_data->xd;
+  struct macroblockd_plane *const pd = xd->plane;
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    pd[i].qcoeff  = tile_data->qcoeff[i];
+    pd[i].dqcoeff = tile_data->dqcoeff[i];
+    pd[i].eobs    = tile_data->eobs[i];
+    vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
+  }
+}
+
 static int tile_worker_hook(void *arg1, void *arg2) {
   TileWorkerData *tile_data = (TileWorkerData*)arg1;
   const TileInfo *const tile = (TileInfo*)arg2;
@@ -1008,6 +1024,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
       setup_token_decoder(data, data_end, size, &cm->error,
                           &tile_data->bit_reader);
       setup_tile_context(pbi, &tile_data->xd, 0, tile_col);
+      setup_tile_macroblockd(tile_data);
 
       worker->had_error = 0;
       if (i == num_workers - 1 || tile_col == tile_cols - 1) {
@@ -1319,7 +1336,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
   cm->fc = cm->frame_contexts[cm->frame_context_idx];
   vp9_zero(cm->counts);
   for (i = 0; i < MAX_MB_PLANE; ++i)
-    vp9_zero(xd->plane[i].dqcoeff);
+    vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
 
   xd->corrupted = 0;
   new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 5f970a3d5636645f35d03e03594ed7f6b3836d91..cb45d3702e89ca969b0ad51eb21693abe373cfc0 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -107,6 +107,18 @@ void vp9_initialize_dec() {
   }
 }
 
+static void init_macroblockd(VP9D_COMP *const pbi) {
+  MACROBLOCKD *xd = &pbi->mb;
+  struct macroblockd_plane *const pd = xd->plane;
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    pd[i].qcoeff  = pbi->qcoeff[i];
+    pd[i].dqcoeff = pbi->dqcoeff[i];
+    pd[i].eobs    = pbi->eobs[i];
+  }
+}
+
 VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
   VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP));
   VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
@@ -141,6 +153,8 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
   cm->error.setjmp = 0;
   pbi->decoded_key_frame = 0;
 
+  init_macroblockd(pbi);
+
   vp9_worker_init(&pbi->lf_worker);
 
   return pbi;
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 7c4c9db36f86f3c34246ff6076c1af2c53ba7f43..7ad05e6b297dae24277198c8950e08ce16cdce7e 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -22,6 +22,10 @@ typedef struct VP9Decompressor {
 
   DECLARE_ALIGNED(16, VP9_COMMON, common);
 
+  DECLARE_ALIGNED(16, int16_t,  qcoeff[MAX_MB_PLANE][64 * 64]);
+  DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
+
   VP9D_CONFIG oxcf;
 
   const uint8_t *source;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index f922f900ad42eec2cf8678b596ef33004ec6ff1b..4f1357a2d7888addb25823653a89ab21cfea4744 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1524,6 +1524,17 @@ static void free_pick_mode_context(MACROBLOCK *x) {
   }
 }
 
+static void init_macroblock(VP9_COMP *const cpi) {
+  MACROBLOCKD *xd = &cpi->mb.e_mbd;
+  struct macroblockd_plane *const pd = xd->plane;
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    pd[i].qcoeff  = cpi->qcoeff[i];
+    pd[i].dqcoeff = cpi->dqcoeff[i];
+    pd[i].eobs    = cpi->eobs[i];
+  }
+}
+
 VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
   int i, j;
   volatile union {
@@ -1562,6 +1573,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
 
   init_pick_mode_context(cpi);
 
+  init_macroblock(cpi);
+
   cm->current_video_frame   = 0;
   cpi->kf_overspend_bits            = 0;
   cpi->kf_bitrate_adjustment        = 0;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 9429c7fedd284e34079abacbc1ecd97d1423cff9..839a92b7e335f77a1b23a9ea3fe6dc011c1990ab 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -313,6 +313,10 @@ typedef struct VP9_COMP {
   VP9_CONFIG oxcf;
   struct rdcost_block_args rdcost_stack;
 
+  DECLARE_ALIGNED(16, int16_t,  qcoeff[MAX_MB_PLANE][64 * 64]);
+  DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
+
   struct lookahead_ctx    *lookahead;
   struct lookahead_entry  *source;
 #if CONFIG_MULTIPLE_ARF