diff --git a/configure b/configure
index 2b0722df1617de93c110676690a027cfbc1ab458..01b073fd1bfabb08b242ab1ac8d97f20029f0e15 100755
--- a/configure
+++ b/configure
@@ -273,6 +273,7 @@ EXPERIMENT_LIST="
     warped_motion
     entropy
     bidir_pred
+    bitstream_debug
 "
 CONFIG_LIST="
     dependency_tracking
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 6c2cc6fa0731cec577b87d06f2a61487cfbd9049..a5f7030dbc040c7ebee70f7f6172acb182a3e3e1 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -18,6 +18,7 @@
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem_ops.h"
 #include "vpx_ports/system_state.h"
+#include "vpx_util/debug_util.h"
 
 #include "vp10/common/entropy.h"
 #include "vp10/common/entropymode.h"
@@ -3583,6 +3584,10 @@ void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dst, size_t *size) {
   VP10_COMMON *const cm = &cpi->common;
   const int have_tiles = cm->tile_cols * cm->tile_rows > 1;
 
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_reset_write();
+#endif
+
   // Write the uncompressed header
   write_uncompressed_header(cpi, &wb);
 
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 4b8dbc350f27a592392105635738ac25ff8560e0..1767ad1976b63a611b1f8d3d04405a289352e78b 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -59,6 +59,7 @@
 #include "vpx_ports/system_state.h"
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_scale/vpx_scale.h"
+#include "vpx_util/debug_util.h"
 
 #define AM_SEGMENT_ID_INACTIVE 7
 #define AM_SEGMENT_ID_ACTIVE 0
@@ -5419,6 +5420,12 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
 #endif  // CONFIG_EXT_REFS
   int i;
 
+#if CONFIG_BITSTREAM_DEBUG
+  assert(cpi->oxcf.max_threads == 0 &&
+         "bitstream debug tool does not support multithreading");
+  bitstream_queue_record_write();
+#endif
+
   vpx_usec_timer_start(&cmptimer);
 
   vp10_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
diff --git a/vpx_dsp/bitreader.h b/vpx_dsp/bitreader.h
index 6ee2a58632c5e4c5a2c7574f9cc430be74aee37a..10124545392dbf00dd0036de5cf176a535df1966 100644
--- a/vpx_dsp/bitreader.h
+++ b/vpx_dsp/bitreader.h
@@ -11,14 +11,21 @@
 #ifndef VPX_DSP_BITREADER_H_
 #define VPX_DSP_BITREADER_H_
 
-#include <stddef.h>
 #include <limits.h>
+#include <stddef.h>
 
 #include "./vpx_config.h"
+
+#if CONFIG_BITSTREAM_DEBUG
+#include <assert.h>
+#include <stdio.h>
+#endif  // CONFIG_BITSTREAM_DEBUG
+
 #include "vpx_ports/mem.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/prob.h"
+#include "vpx_util/debug_util.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -103,6 +110,22 @@ static INLINE int vpx_read(vpx_reader *r, int prob) {
   r->count = count;
   r->range = range;
 
+#if CONFIG_BITSTREAM_DEBUG
+  {
+    int ref_bit, ref_prob;
+    const int queue_r = bitstream_queue_get_read();
+    bitstream_queue_pop(&ref_bit, &ref_prob);
+    if (prob != ref_prob) {
+      fprintf(stderr, "prob error, prob %d ref_prob %d queue_r %d\n", prob,
+              ref_prob, queue_r);
+      assert(0);
+    }
+    if ((int)bit != ref_bit) {
+      fprintf(stderr, "bit error, bit %d ref_bit %d\n", bit, ref_bit);
+      assert(0);
+    }
+  }
+#endif  // CONFIG_BITSTREAM_DEBUG
   return bit;
 }
 
diff --git a/vpx_dsp/bitwriter.c b/vpx_dsp/bitwriter.c
index 81e28b309f573e2cabb1b6c29f9324655eacca86..0abe35125e43edcbeaca6216c8b23ff18f1ef303 100644
--- a/vpx_dsp/bitwriter.c
+++ b/vpx_dsp/bitwriter.c
@@ -24,8 +24,16 @@ void vpx_start_encode(vpx_writer *br, uint8_t *source) {
 void vpx_stop_encode(vpx_writer *br) {
   int i;
 
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_set_skip_write(1);
+#endif  // CONFIG_BITSTREAM_DEBUG
+
   for (i = 0; i < 32; i++) vpx_write_bit(br, 0);
 
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_set_skip_write(0);
+#endif  // CONFIG_BITSTREAM_DEBUG
+
   // Ensure there's no ambigous collision with any index marker bytes
   if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0;
 }
diff --git a/vpx_dsp/bitwriter.h b/vpx_dsp/bitwriter.h
index 41040cf93549829d36297293d1a655a6b52ac09d..5e952a80b16f4ef1d74499f311ecd36fe75140d0 100644
--- a/vpx_dsp/bitwriter.h
+++ b/vpx_dsp/bitwriter.h
@@ -12,8 +12,8 @@
 #define VPX_DSP_BITWRITER_H_
 
 #include "vpx_ports/mem.h"
-
 #include "vpx_dsp/prob.h"
+#include "vpx_util/debug_util.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -37,6 +37,10 @@ static INLINE void vpx_write(vpx_writer *br, int bit, int probability) {
   unsigned int lowvalue = br->lowvalue;
   register int shift;
 
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_push(bit, probability);
+#endif  // CONFIG_BITSTREAM_DEBUG
+
   split = 1 + (((range - 1) * probability) >> 8);
 
   range = split;
diff --git a/vpx_util/debug_util.c b/vpx_util/debug_util.c
new file mode 100644
index 0000000000000000000000000000000000000000..6db1e0aa8ae15595b408ce54cfd70a8af9dab668
--- /dev/null
+++ b/vpx_util/debug_util.c
@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_util/debug_util.h"
+#include <assert.h>
+#include <stdio.h>
+#if CONFIG_BITSTREAM_DEBUG
+#define QUEUE_MAX_SIZE 2000000
+static int result_queue[QUEUE_MAX_SIZE];
+static int prob_queue[QUEUE_MAX_SIZE];
+static int queue_r = 0;
+static int queue_w = 0;
+static int queue_prev_w = -1;
+static int skip_r = 0;
+static int skip_w = 0;
+
+void bitstream_queue_set_skip_write(int skip) { skip_w = skip; }
+
+void bitstream_queue_set_skip_read(int skip) { skip_r = skip; }
+
+void bitstream_queue_record_write(void) { queue_prev_w = queue_w; }
+
+void bitstream_queue_reset_write(void) { queue_w = queue_prev_w; }
+
+int bitstream_queue_get_write(void) { return queue_w; }
+
+int bitstream_queue_get_read(void) { return queue_r; }
+
+void bitstream_queue_pop(int* result, int* prob) {
+  if (!skip_r) {
+    if (queue_w == queue_r) {
+      printf("buffer underflow queue_w %d queue_r %d\n", queue_w, queue_r);
+      assert(0);
+    }
+    *result = result_queue[queue_r];
+    *prob = prob_queue[queue_r];
+    queue_r = (queue_r + 1) % QUEUE_MAX_SIZE;
+  }
+}
+
+void bitstream_queue_push(int result, int prob) {
+  if (!skip_w) {
+    result_queue[queue_w] = result;
+    prob_queue[queue_w] = prob;
+    queue_w = (queue_w + 1) % QUEUE_MAX_SIZE;
+    if (queue_w == queue_r) {
+      printf("buffer overflow queue_w %d queue_r %d\n", queue_w, queue_r);
+      assert(0);
+    }
+  }
+}
+#endif  // CONFIG_BITSTREAM_DEBUG
diff --git a/vpx_util/debug_util.h b/vpx_util/debug_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..0438be4869fdeee956fbf23e465d821e859367c8
--- /dev/null
+++ b/vpx_util/debug_util.h
@@ -0,0 +1,43 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_UTIL_DEBUG_UTIL_H_
+#define VPX_UTIL_DEBUG_UTIL_H_
+
+#include "./vpx_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if CONFIG_BITSTREAM_DEBUG
+/* This is a debug tool used to detect bitstream error. On encoder side, it
+ * pushes each bit and probability into a queue before the bit is written into
+ * the Arithmetic coder. On decoder side, whenever a bit is read out from the
+ * Arithmetic coder, it pops out the reference bit and probability from the
+ * queue as well. If the two results do not match, this debug tool will report
+ * an error.  This tool can be used to pin down the bitstream error precisely.
+ * By combining gdb's backtrace method, we can detect which module causes the
+ * bitstream error. */
+int bitstream_queue_get_write(void);
+int bitstream_queue_get_read(void);
+void bitstream_queue_record_write(void);
+void bitstream_queue_reset_write(void);
+void bitstream_queue_pop(int *result, int *prob);
+void bitstream_queue_push(int result, int prob);
+void bitstream_queue_set_skip_write(int skip);
+void bitstream_queue_set_skip_read(int skip);
+#endif  // CONFIG_BITSTREAM_DEBUG
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_UTIL_DEBUG_UTIL_H_
diff --git a/vpx_util/vpx_util.mk b/vpx_util/vpx_util.mk
index c0ef8d3362917dd193c97a7e649f874d2e9f0c3f..480e61f957b45724399931420f6ea4bdf47d0261 100644
--- a/vpx_util/vpx_util.mk
+++ b/vpx_util/vpx_util.mk
@@ -11,4 +11,6 @@
 UTIL_SRCS-yes += vpx_util.mk
 UTIL_SRCS-yes += vpx_thread.c
 UTIL_SRCS-yes += vpx_thread.h
+UTIL_SRCS-yes += debug_util.c
+UTIL_SRCS-yes += debug_util.h
 UTIL_SRCS-yes += endian_inl.h