diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index a1b5683781ddf87a2f31cf3ca204b6492c9be688..d5139f7d1622e1e2e44c632edfbbefed6cf755b1 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -45,6 +45,7 @@ typedef enum {
 #endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
 
 #define MAXTXLEN 32
+#define CU_SIZE  64
 
 static INLINE int is_inter_mode(PREDICTION_MODE mode) {
 #if CONFIG_EXT_INTER
@@ -55,6 +56,23 @@ static INLINE int is_inter_mode(PREDICTION_MODE mode) {
 }
 
 #if CONFIG_EXT_INTER
+#define WEDGE_BITS_SML    3
+#define WEDGE_BITS_MED    4
+#define WEDGE_BITS_BIG    5
+#define WEDGE_NONE       -1
+#define WEDGE_WEIGHT_BITS 6
+
+static INLINE int get_wedge_bits(BLOCK_SIZE sb_type) {
+  if (sb_type < BLOCK_8X8)
+    return 0;
+  if (sb_type <= BLOCK_8X8)
+    return WEDGE_BITS_SML;
+  else if (sb_type <= BLOCK_32X32)
+    return WEDGE_BITS_MED;
+  else
+    return WEDGE_BITS_BIG;
+}
+
 static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
 }
@@ -69,6 +87,11 @@ static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
           mode == NEAREST_NEWMV || mode == NEW_NEARESTMV ||
           mode == NEAR_NEWMV || mode == NEW_NEARMV);
 }
+#else
+
+static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
+  return (mode == NEWMV);
+}
 #endif  // CONFIG_EXT_INTER
 
 /* For keyframes, intra block modes are predicted by the (already decoded)
@@ -172,6 +195,12 @@ typedef struct {
 #if CONFIG_EXT_INTER
   PREDICTION_MODE interintra_mode;
   PREDICTION_MODE interintra_uv_mode;
+  // TODO(debargha): Consolidate these flags
+  int use_wedge_interintra;
+  int interintra_wedge_index;
+  int interintra_uv_wedge_index;
+  int use_wedge_interinter;
+  int interinter_wedge_index;
 #endif  // CONFIG_EXT_INTER
 
 #if CONFIG_OBMC
@@ -203,12 +232,6 @@ static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
   return mbmi->ref_frame[1] > INTRA_FRAME;
 }
 
-#if CONFIG_OBMC
-static INLINE int is_obmc_allowed(const MB_MODE_INFO *mbmi) {
-  return (mbmi->sb_type >= BLOCK_8X8);
-}
-#endif  // CONFIG_OBMC
-
 PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
                                     const MODE_INFO *left_mi, int b);
 
@@ -647,6 +670,23 @@ static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) {
 }
 #endif  // CONFIG_EXT_INTER
 
+#if CONFIG_OBMC
+static INLINE int is_obmc_allowed(const MB_MODE_INFO *mbmi) {
+  return (mbmi->sb_type >= BLOCK_8X8);
+}
+
+static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) {
+#if CONFIG_EXT_INTER
+  return (is_inter_block(mbmi) &&
+          !(has_second_ref(mbmi) && get_wedge_bits(mbmi->sb_type) &&
+            mbmi->use_wedge_interinter) &&
+          !(is_interintra_pred(mbmi)));
+#else
+  return (is_inter_block(mbmi));
+#endif  // CONFIG_EXT_INTER
+}
+#endif  // CONFIG_OBMC
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index e4c27a777c4d938f0279888080b67f138783ee07..d5c8f4485c9f4f0e345b15d4587afd7d0e8f4cbd 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -10,6 +10,7 @@
 
 #include "vpx_mem/vpx_mem.h"
 
+#include "vp10/common/reconinter.h"
 #include "vp10/common/onyxc_int.h"
 #include "vp10/common/seg_common.h"
 
@@ -190,8 +191,8 @@ static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = {
 
 #if CONFIG_EXT_INTER
 static const vpx_prob default_new2mv_prob = 180;
-#endif
-#endif
+#endif  // CONFIG_EXT_INTER
+#endif  // CONFIG_REF_MV
 
 static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
                                               [INTER_MODES - 1] = {
@@ -230,6 +231,14 @@ static const vpx_prob default_inter_compound_mode_probs
 static const vpx_prob default_interintra_prob[BLOCK_SIZES] = {
   192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
 };
+
+static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
+  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+};
+
+static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
+  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+};
 #endif  // CONFIG_EXT_INTER
 
 #if CONFIG_OBMC
@@ -1337,6 +1346,8 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
 #if CONFIG_EXT_INTER
   vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs);
   vp10_copy(fc->interintra_prob, default_interintra_prob);
+  vp10_copy(fc->wedge_interintra_prob, default_wedge_interintra_prob);
+  vp10_copy(fc->wedge_interinter_prob, default_wedge_interinter_prob);
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_SUPERTX
   vp10_copy(fc->supertx_prob, default_supertx_prob);
@@ -1445,12 +1456,21 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) {
                          pre_fc->inter_compound_mode_probs[i],
                          counts->inter_compound_mode[i],
                          fc->inter_compound_mode_probs[i]);
-
   for (i = 0; i < BLOCK_SIZES; ++i) {
     if (is_interintra_allowed_bsize(i))
       fc->interintra_prob[i] = mode_mv_merge_probs(pre_fc->interintra_prob[i],
                                                    counts->interintra[i]);
   }
+  for (i = 0; i < BLOCK_SIZES; ++i) {
+    if (is_interintra_allowed_bsize(i) && get_wedge_bits(i))
+      fc->wedge_interintra_prob[i] = mode_mv_merge_probs(
+          pre_fc->wedge_interintra_prob[i], counts->wedge_interintra[i]);
+  }
+  for (i = 0; i < BLOCK_SIZES; ++i) {
+    if (get_wedge_bits(i))
+      fc->wedge_interinter_prob[i] = mode_mv_merge_probs(
+          pre_fc->wedge_interinter_prob[i], counts->wedge_interinter[i]);
+  }
 #endif  // CONFIG_EXT_INTER
 
   for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index d9858b3d3985edfe7dbf5bd2f2c80ae53118da4c..b208dcf4af082dfa14f40e1e795d6037d114b163 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -74,13 +74,15 @@ typedef struct frame_contexts {
 #if CONFIG_EXT_INTER
   vpx_prob new2mv_prob;
 #endif  // CONFIG_EXT_INTER
-#endif
+#endif  // CONFIG_REF_MV
 
   vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
 #if CONFIG_EXT_INTER
   vpx_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS]
                                     [INTER_COMPOUND_MODES - 1];
   vpx_prob interintra_prob[BLOCK_SIZES];
+  vpx_prob wedge_interintra_prob[BLOCK_SIZES];
+  vpx_prob wedge_interinter_prob[BLOCK_SIZES];
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_OBMC
   vpx_prob obmc_prob[BLOCK_SIZES];
@@ -143,6 +145,8 @@ typedef struct FRAME_COUNTS {
 #if CONFIG_EXT_INTER
   unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
   unsigned int interintra[BLOCK_SIZES][2];
+  unsigned int wedge_interintra[BLOCK_SIZES][2];
+  unsigned int wedge_interinter[BLOCK_SIZES][2];
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_OBMC
   unsigned int obmc[BLOCK_SIZES][2];
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 4e3a5b174f21b00696931e882e9df7646f944ea1..87bcc8a71cce5934dd9d0ff3418d6b70d68f576e 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -41,21 +41,32 @@ typedef enum BITSTREAM_PROFILE {
   MAX_PROFILES
 } BITSTREAM_PROFILE;
 
-#define BLOCK_4X4     0
-#define BLOCK_4X8     1
-#define BLOCK_8X4     2
-#define BLOCK_8X8     3
-#define BLOCK_8X16    4
-#define BLOCK_16X8    5
-#define BLOCK_16X16   6
-#define BLOCK_16X32   7
-#define BLOCK_32X16   8
-#define BLOCK_32X32   9
-#define BLOCK_32X64  10
-#define BLOCK_64X32  11
-#define BLOCK_64X64  12
-#define BLOCK_SIZES  13
-#define BLOCK_INVALID BLOCK_SIZES
+#define BLOCK_4X4      0
+#define BLOCK_4X8      1
+#define BLOCK_8X4      2
+#define BLOCK_8X8      3
+#define BLOCK_8X16     4
+#define BLOCK_16X8     5
+#define BLOCK_16X16    6
+#define BLOCK_16X32    7
+#define BLOCK_32X16    8
+#define BLOCK_32X32    9
+#define BLOCK_32X64   10
+#define BLOCK_64X32   11
+#define BLOCK_64X64   12
+
+#if CONFIG_EXT_PARTITION
+#define BLOCK_64X128  13
+#define BLOCK_128X64  14
+#define BLOCK_128X128 15
+#define BLOCK_SIZES   16
+#else
+#define BLOCK_SIZES   13
+#endif  // CONFIG_EXT_PARTITION
+
+#define BLOCK_INVALID (BLOCK_SIZES)
+#define BLOCK_LARGEST (BLOCK_SIZES - 1)
+
 typedef uint8_t BLOCK_SIZE;
 
 typedef enum PARTITION_TYPE {
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index 72e6ae0a75a636499622909e927c462bdd116f8e..174ff8074b9eb22680a6fdc5e4ca21d5c1e85a7e 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -22,9 +22,490 @@
 #include "vp10/common/onyxc_int.h"
 #endif  // CONFIG_OBMC
 
-// TODO(geza.lore) Update this when the extended coding unit size experiment
-// have been ported.
-#define CU_SIZE 64
+#if CONFIG_EXT_INTER
+static int get_masked_weight(int m) {
+  #define SMOOTHER_LEN  32
+  static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = {
+    0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  1,  1,  1,
+    1,  1,  2,  2,  3,  4,  5,  6,
+    8,  9, 12, 14, 17, 21, 24, 28,
+    32,
+    36, 40, 43, 47, 50, 52, 55, 56,
+    58, 59, 60, 61, 62, 62, 63, 63,
+    63, 63, 63, 64, 64, 64, 64, 64,
+    64, 64, 64, 64, 64, 64, 64, 64,
+  };
+  if (m < -SMOOTHER_LEN)
+    return 0;
+  else if (m > SMOOTHER_LEN)
+    return (1 << WEDGE_WEIGHT_BITS);
+  else
+    return smoothfn[m + SMOOTHER_LEN];
+}
+
+// [negative][transpose][reverse]
+DECLARE_ALIGNED(16, static uint8_t,
+                wedge_mask_obl[2][2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
+// [negative][transpose]
+DECLARE_ALIGNED(16, static uint8_t,
+                wedge_mask_str[2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
+
+void vp10_init_wedge_masks() {
+  int i, j;
+  const int w = MASK_MASTER_SIZE;
+  const int h = MASK_MASTER_SIZE;
+  const int stride = MASK_MASTER_STRIDE;
+  const int a[4] = {2, 1, 2, 2};
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int x = (2 * j + 1 - (a[2] * w) / 2);
+      int y = (2 * i + 1 - (a[3] * h) / 2);
+      int m = (a[0] * x + a[1] * y) / 2;
+      wedge_mask_obl[0][0][0][i * stride + j] =
+          wedge_mask_obl[0][1][0][j * stride + i] =
+          wedge_mask_obl[0][0][1][i * stride + w - 1 - j] =
+          wedge_mask_obl[0][1][1][(w - 1 - j) * stride + i] =
+          get_masked_weight(m);
+      wedge_mask_obl[1][0][0][i * stride + j] =
+          wedge_mask_obl[1][1][0][j * stride + i] =
+          wedge_mask_obl[1][0][1][i * stride + w - 1 - j] =
+          wedge_mask_obl[1][1][1][(w - 1 - j) * stride + i] =
+          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
+      wedge_mask_str[0][0][i * stride + j] =
+          wedge_mask_str[0][1][j * stride + i] =
+          get_masked_weight(x);
+      wedge_mask_str[1][0][i * stride + j] =
+          wedge_mask_str[1][1][j * stride + i] =
+          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x);
+    }
+}
+
+static const uint8_t *get_wedge_mask_inplace(const int *a,
+                                             int h, int w) {
+  const int woff = (a[2] * w) >> 2;
+  const int hoff = (a[3] * h) >> 2;
+  const int oblique = (abs(a[0]) + abs(a[1]) == 3);
+  const uint8_t *master;
+  int transpose, reverse, negative;
+  if (oblique) {
+    negative = (a[0] < 0);
+    transpose = (abs(a[0]) == 1);
+    reverse = (a[0] < 0) ^ (a[1] < 0);
+  } else {
+    negative = (a[0] < 0 || a[1] < 0);
+    transpose = (a[0] == 0);
+    reverse = 0;
+  }
+  master = (oblique ?
+            wedge_mask_obl[negative][transpose][reverse] :
+            wedge_mask_str[negative][transpose]) +
+      MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
+      MASK_MASTER_SIZE / 2 - woff;
+  return master;
+}
+
+// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0
+// The soft mask is obtained by computing f(x, y) and then calling
+// get_masked_weight(f(x, y)).
+static const int wedge_params_sml[1 << WEDGE_BITS_SML][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  {-2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-1, -2, 2, 2},
+  { 1,  2, 2, 2},
+};
+
+static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  {-2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-1, -2, 2, 2},
+  { 1,  2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 3},
+  { 1,  2, 2, 3},
+};
+
+static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  {-2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-1, -2, 2, 2},
+  { 1,  2, 2, 2},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 3, 2},
+  { 2,  1, 3, 2},
+};
+
+static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  {-2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-1, -2, 2, 2},
+  { 1,  2, 2, 2},
+
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 3},
+  { 0,  2, 0, 3},
+  {-2,  0, 1, 0},
+  { 2,  0, 1, 0},
+  {-2,  0, 3, 0},
+  { 2,  0, 3, 0},
+};
+
+static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  {-2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-1, -2, 2, 2},
+  { 1,  2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 3},
+  { 1,  2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 3, 2},
+  { 2,  1, 3, 2},
+
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 2},
+  { 0,  2, 0, 2},
+  { 0, -2, 0, 3},
+  { 0,  2, 0, 3},
+  {-2,  0, 2, 0},
+  { 2,  0, 2, 0},
+};
+
+static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  {-2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-1, -2, 2, 2},
+  { 1,  2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 3},
+  { 1,  2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 3, 2},
+  { 2,  1, 3, 2},
+
+  { 0, -2, 0, 2},
+  { 0,  2, 0, 2},
+  {-2,  0, 1, 0},
+  { 2,  0, 1, 0},
+  {-2,  0, 2, 0},
+  { 2,  0, 2, 0},
+  {-2,  0, 3, 0},
+  { 2,  0, 3, 0},
+};
+
+static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  {-2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-1, -2, 2, 2},
+  { 1,  2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 3},
+  { 1,  2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 3, 2},
+  { 2,  1, 3, 2},
+
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 3},
+  { 0,  2, 0, 3},
+  {-2,  0, 1, 0},
+  { 2,  0, 1, 0},
+  {-2,  0, 3, 0},
+  { 2,  0, 3, 0},
+};
+
+static const int *get_wedge_params(int wedge_index,
+                                   BLOCK_SIZE sb_type,
+                                   int h, int w) {
+  const int *a = NULL;
+  const int wedge_bits = get_wedge_bits(sb_type);
+
+  if (wedge_index == WEDGE_NONE)
+    return NULL;
+
+  if (wedge_bits == WEDGE_BITS_SML) {
+    a = wedge_params_sml[wedge_index];
+  } else if (wedge_bits == WEDGE_BITS_MED) {
+    if (h > w)
+      a = wedge_params_med_hgtw[wedge_index];
+    else if (h < w)
+      a = wedge_params_med_hltw[wedge_index];
+    else
+      a = wedge_params_med_heqw[wedge_index];
+  } else if (wedge_bits == WEDGE_BITS_BIG) {
+    if (h > w)
+      a = wedge_params_big_hgtw[wedge_index];
+    else if (h < w)
+      a = wedge_params_big_hltw[wedge_index];
+    else
+      a = wedge_params_big_heqw[wedge_index];
+  } else {
+    assert(0);
+  }
+  return a;
+}
+
+const uint8_t *vp10_get_soft_mask(int wedge_index,
+                                  BLOCK_SIZE sb_type,
+                                  int h, int w) {
+  const int *a = get_wedge_params(wedge_index, sb_type, h, w);
+  if (a) {
+    return get_wedge_mask_inplace(a, h, w);
+  } else {
+    return NULL;
+  }
+}
+
+#if CONFIG_SUPERTX
+const uint8_t *get_soft_mask_extend(int wedge_index, int plane,
+                                    BLOCK_SIZE sb_type,
+                                    int wedge_offset_y,
+                                    int wedge_offset_x) {
+  int subh = (plane ? 2 : 4) << b_height_log2_lookup[sb_type];
+  int subw = (plane ? 2 : 4) << b_width_log2_lookup[sb_type];
+  const int *a = get_wedge_params(wedge_index, sb_type, subh, subw);
+  if (a) {
+    const uint8_t *mask = get_wedge_mask_inplace(a, subh, subw);
+    mask -= (wedge_offset_x + wedge_offset_y * MASK_MASTER_STRIDE);
+    return mask;
+  } else {
+    return NULL;
+  }
+}
+
+static void build_masked_compound_extend(uint8_t *dst, int dst_stride,
+                                         uint8_t *dst2, int dst2_stride,
+                                         int plane,
+                                         int wedge_index, BLOCK_SIZE sb_type,
+                                         int wedge_offset_y, int wedge_offset_x,
+                                         int h, int w) {
+  int i, j;
+  const uint8_t *mask = get_soft_mask_extend(
+     wedge_index, plane, sb_type, wedge_offset_y, wedge_offset_x);
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int m = mask[i * MASK_MASTER_STRIDE + j];
+      dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
+                                 dst2[i * dst2_stride + j] *
+                                 ((1 << WEDGE_WEIGHT_BITS) - m) +
+                                 (1 << (WEDGE_WEIGHT_BITS - 1))) >>
+                                 WEDGE_WEIGHT_BITS;
+    }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_masked_compound_extend_highbd(
+    uint8_t *dst_8, int dst_stride,
+    uint8_t *dst2_8, int dst2_stride, int plane,
+    int wedge_index, BLOCK_SIZE sb_type,
+    int wedge_offset_y, int wedge_offset_x,
+    int h, int w) {
+  int i, j;
+  const uint8_t *mask = get_soft_mask_extend(
+      wedge_index, plane, sb_type, wedge_offset_y, wedge_offset_x);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
+  uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8);
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int m = mask[i * MASK_MASTER_STRIDE + j];
+      dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
+                                 dst2[i * dst2_stride + j] *
+                                 ((1 << WEDGE_WEIGHT_BITS) - m) +
+                                 (1 << (WEDGE_WEIGHT_BITS - 1))) >>
+                                 WEDGE_WEIGHT_BITS;
+    }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#else   // CONFIG_SUPERTX
+
+static void build_masked_compound(uint8_t *dst, int dst_stride,
+                                  uint8_t *dst2, int dst2_stride,
+                                  int wedge_index, BLOCK_SIZE sb_type,
+                                  int h, int w) {
+  int i, j;
+  const uint8_t *mask = vp10_get_soft_mask(wedge_index, sb_type, h, w);
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int m = mask[i * MASK_MASTER_STRIDE + j];
+      dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
+                                 dst2[i * dst2_stride + j] *
+                                 ((1 << WEDGE_WEIGHT_BITS) - m) +
+                                 (1 << (WEDGE_WEIGHT_BITS - 1))) >>
+                                 WEDGE_WEIGHT_BITS;
+    }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride,
+                                         uint8_t *dst2_8, int dst2_stride,
+                                         int wedge_index, BLOCK_SIZE sb_type,
+                                         int h, int w) {
+  int i, j;
+  const uint8_t *mask = vp10_get_soft_mask(wedge_index, sb_type, h, w);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
+  uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8);
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int m = mask[i * MASK_MASTER_STRIDE + j];
+      dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
+                                 dst2[i * dst2_stride + j] *
+                                 ((1 << WEDGE_WEIGHT_BITS) - m) +
+                                 (1 << (WEDGE_WEIGHT_BITS - 1))) >>
+                                 WEDGE_WEIGHT_BITS;
+    }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_SUPERTX
+
+void vp10_make_masked_inter_predictor(
+    const uint8_t *pre,
+    int pre_stride,
+    uint8_t *dst,
+    int dst_stride,
+    const int subpel_x,
+    const int subpel_y,
+    const struct scale_factors *sf,
+    int w, int h,
+    const INTERP_FILTER interp_filter,
+    int xs, int ys,
+#if CONFIG_SUPERTX
+    int plane, int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+    const MACROBLOCKD *xd) {
+  const MODE_INFO *mi = xd->mi[0];
+#if CONFIG_VP9_HIGHBITDEPTH
+  uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE];
+  uint8_t *tmp_dst =
+      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
+      CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
+  vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE,
+                            subpel_x, subpel_y, sf, w, h, 0,
+                            interp_filter, xs, ys, xd);
+#if CONFIG_SUPERTX
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    build_masked_compound_extend_highbd(
+        dst, dst_stride, tmp_dst, CU_SIZE, plane,
+        mi->mbmi.interinter_wedge_index,
+        mi->mbmi.sb_type,
+        wedge_offset_y, wedge_offset_x, h, w);
+  else
+    build_masked_compound_extend(
+        dst, dst_stride, tmp_dst, CU_SIZE, plane,
+        mi->mbmi.interinter_wedge_index,
+        mi->mbmi.sb_type,
+        wedge_offset_y, wedge_offset_x, h, w);
+#else
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    build_masked_compound_highbd(
+        dst, dst_stride, tmp_dst, CU_SIZE,
+        mi->mbmi.interinter_wedge_index,
+        mi->mbmi.sb_type, h, w);
+  else
+    build_masked_compound(
+        dst, dst_stride, tmp_dst, CU_SIZE,
+        mi->mbmi.interinter_wedge_index,
+        mi->mbmi.sb_type, h, w);
+#endif  // CONFIG_SUPERTX
+#else   // CONFIG_VP9_HIGHBITDEPTH
+  uint8_t tmp_dst[CU_SIZE * CU_SIZE];
+  vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE,
+                            subpel_x, subpel_y, sf, w, h, 0,
+                            interp_filter, xs, ys, xd);
+#if CONFIG_SUPERTX
+  build_masked_compound_extend(
+      dst, dst_stride, tmp_dst, CU_SIZE, plane,
+      mi->mbmi.interinter_wedge_index,
+      mi->mbmi.sb_type,
+      wedge_offset_y, wedge_offset_x, h, w);
+#else
+  build_masked_compound(
+      dst, dst_stride, tmp_dst, CU_SIZE,
+      mi->mbmi.interinter_wedge_index,
+      mi->mbmi.sb_type, h, w);
+#endif  // CONFIG_SUPERTX
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
+#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
@@ -44,7 +525,7 @@ void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
 
   src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
 
-  high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+  highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
                        sf, w, h, ref, interp_filter, sf->x_step_q4,
                        sf->y_step_q4, bd);
 }
@@ -78,6 +559,9 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane,
                             int block,
                             int bw, int bh,
                             int x, int y, int w, int h,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                            int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
                             int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
 #if CONFIG_OBMC
@@ -129,19 +613,22 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane,
     pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
            + (scaled_mv.col >> SUBPEL_BITS);
 
-#if CONFIG_VP9_HIGHBITDEPTH
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                           subpel_x, subpel_y, sf, w, h, ref,
-                           interp_filter, xs, ys, xd->bd);
-    } else {
-      inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                      subpel_x, subpel_y, sf, w, h, ref, interp_filter, xs, ys);
-    }
-#else
-    inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                    subpel_x, subpel_y, sf, w, h, ref, interp_filter, xs, ys);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTER
+    if (ref && get_wedge_bits(mi->mbmi.sb_type) &&
+        mi->mbmi.use_wedge_interinter)
+      vp10_make_masked_inter_predictor(
+          pre, pre_buf->stride, dst, dst_buf->stride,
+          subpel_x, subpel_y, sf, w, h,
+          interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+          plane, wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+          xd);
+    else
+#endif  // CONFIG_EXT_INTER
+      vp10_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+                                subpel_x, subpel_y, sf, w, h, ref,
+                                interp_filter, xs, ys, xd);
   }
 }
 
@@ -222,14 +709,22 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
                                   0, 0,
 #endif  // CONFIG_OBMC
                                   y * 2 + x, bw, bh,
-                                  4 * x, 4 * y, pw, ph, mi_x, mi_y);
+                                  4 * x, 4 * y, pw, ph,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                  0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                  mi_x, mi_y);
     } else {
       build_inter_predictors(xd, plane,
 #if CONFIG_OBMC
                              0, 0,
 #endif  // CONFIG_OBMC
                              0, bw, bh,
-                             0, 0, bw, bh, mi_x, mi_y);
+                             0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                             0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                             mi_x, mi_y);
     }
   }
 }
@@ -524,9 +1019,13 @@ void vp10_build_masked_inter_predictor_complex(
   (void) xd;
 }
 
-void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd,
-                                           int mi_row, int mi_col,
-                                           BLOCK_SIZE bsize, int block) {
+void vp10_build_inter_predictors_sb_sub8x8_extend(
+    MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+    int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+    int mi_row, int mi_col,
+    BLOCK_SIZE bsize, int block) {
   // Prediction function used in supertx:
   // Use the mv at current block (which is less than 8x8)
   // to get prediction of a block located at (mi_row, mi_col) at size of bsize
@@ -535,6 +1034,10 @@ void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd,
   int plane;
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+  const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+  const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif  // CONFIG_EXT_INTER
 
   // For sub8x8 uv:
   // Skip uv prediction in supertx except the first block (block = 0)
@@ -554,6 +1057,10 @@ void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd,
 #endif  // CONFIG_OBMC
                            block, bw, bh,
                            0, 0, bw, bh,
+#if CONFIG_EXT_INTER
+                           wedge_offset_x >> (xd->plane[plane].subsampling_x),
+                           wedge_offset_y >> (xd->plane[plane].subsampling_y),
+#endif  // CONFIG_SUPERTX
                            mi_x, mi_y);
   }
 #if CONFIG_EXT_INTER
@@ -568,6 +1075,59 @@ void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd,
                                      bsize);
 #endif  // CONFIG_EXT_INTER
 }
+
+void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+                                           int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+                                           int mi_row, int mi_col,
+                                           BLOCK_SIZE bsize) {
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+  const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+  const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif  // CONFIG_EXT_INTER
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(
+        bsize, &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      int i = 0, x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+           build_inter_predictors(
+               xd, plane,
+#if CONFIG_OBMC
+               0, 0,
+#endif  // CONFIG_OBMC
+               i++, bw, bh, 4 * x, 4 * y, 4, 4,
+#if CONFIG_EXT_INTER
+               wedge_offset_x >> (xd->plane[plane].subsampling_x),
+               wedge_offset_y >> (xd->plane[plane].subsampling_y),
+#endif  // CONFIG_EXT_INTER
+               mi_x, mi_y);
+    } else {
+      build_inter_predictors(
+          xd, plane,
+#if CONFIG_OBMC
+          0, 0,
+#endif  // CONFIG_OBMC
+          0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_EXT_INTER
+          wedge_offset_x >> (xd->plane[plane].subsampling_x),
+          wedge_offset_y >> (xd->plane[plane].subsampling_y),
+#endif  // CONFIG_EXT_INTER
+          mi_x, mi_y);
+    }
+  }
+}
 #endif  // CONFIG_SUPERTX
 
 #if CONFIG_OBMC
@@ -695,7 +1255,7 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
     mi_step = VPXMIN(xd->n8_w,
                      num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
 
-    if (!is_inter_block(above_mbmi))
+    if (!is_neighbor_overlappable(above_mbmi))
       continue;
 
     overlap = (above_mbmi->skip) ?
@@ -761,7 +1321,7 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
     mi_step = VPXMIN(xd->n8_h,
                      num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
 
-    if (!is_inter_block(left_mbmi))
+    if (!is_neighbor_overlappable(left_mbmi))
       continue;
 
     overlap = (left_mbmi->skip) ?
@@ -816,6 +1376,9 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
 
 #if CONFIG_EXT_INTER
 static void combine_interintra(PREDICTION_MODE mode,
+                               int use_wedge_interintra,
+                               int wedge_index,
+                               BLOCK_SIZE bsize,
                                BLOCK_SIZE plane_bsize,
                                uint8_t *comppred,
                                int compstride,
@@ -846,12 +1409,26 @@ static void combine_interintra(PREDICTION_MODE mode,
                     size == 8  ? 8 : 16);
   int i, j;
 
+  if (use_wedge_interintra && get_wedge_bits(bsize)) {
+    const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw);
+    for (i = 0; i < bh; ++i) {
+      for (j = 0; j < bw; ++j) {
+        int m = mask[i * MASK_MASTER_STRIDE + j];
+        comppred[i * compstride + j] =
+            (intrapred[i * intrastride + j] * m +
+             interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) +
+             (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS;
+      }
+    }
+    return;
+  }
+
   switch (mode) {
     case V_PRED:
       for (i = 0; i < bh; ++i) {
         for (j = 0; j < bw; ++j) {
           int scale = weights1d[i * size_scale];
-            comppred[i * compstride + j] =
+          comppred[i * compstride + j] =
               ((scale_max - scale) * interpred[i * interstride + j] +
                scale * intrapred[i * intrastride + j] + scale_round)
                >> scale_bits;
@@ -939,6 +1516,9 @@ static void combine_interintra(PREDICTION_MODE mode,
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static void combine_interintra_highbd(PREDICTION_MODE mode,
+                                      int use_wedge_interintra,
+                                      int wedge_index,
+                                      BLOCK_SIZE bsize,
                                       BLOCK_SIZE plane_bsize,
                                       uint8_t *comppred8,
                                       int compstride,
@@ -973,12 +1553,26 @@ static void combine_interintra_highbd(PREDICTION_MODE mode,
   uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
   (void) bd;
 
+  if (use_wedge_interintra && get_wedge_bits(bsize)) {
+    const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw);
+    for (i = 0; i < bh; ++i) {
+      for (j = 0; j < bw; ++j) {
+        int m = mask[i * MASK_MASTER_STRIDE + j];
+        comppred[i * compstride + j] =
+            (intrapred[i * intrastride + j] * m +
+             interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) +
+             (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS;
+      }
+    }
+    return;
+  }
+
   switch (mode) {
     case V_PRED:
       for (i = 0; i < bh; ++i) {
         for (j = 0; j < bw; ++j) {
           int scale = weights1d[i * size_scale];
-            comppred[i * compstride + j] =
+          comppred[i * compstride + j] =
               ((scale_max - scale) * interpred[i * interstride + j] +
                scale * intrapred[i * intrastride + j] + scale_round)
               >> scale_bits;
@@ -1119,6 +1713,9 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd,
         CONVERT_TO_BYTEPTR(intrapredictor), bw,
         xd->mi[0]->mbmi.interintra_mode, bsize, 0);
     combine_interintra_highbd(xd->mi[0]->mbmi.interintra_mode,
+                              xd->mi[0]->mbmi.use_wedge_interintra,
+                              xd->mi[0]->mbmi.interintra_wedge_index,
+                              bsize,
                               bsize,
                               xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                               ypred, ystride,
@@ -1133,6 +1730,9 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd,
         intrapredictor, bw,
         xd->mi[0]->mbmi.interintra_mode, bsize, 0);
     combine_interintra(xd->mi[0]->mbmi.interintra_mode,
+                       xd->mi[0]->mbmi.use_wedge_interintra,
+                       xd->mi[0]->mbmi.interintra_wedge_index,
+                       bsize,
                        bsize,
                        xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                        ypred, ystride, intrapredictor, bw);
@@ -1155,6 +1755,9 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd,
         CONVERT_TO_BYTEPTR(uintrapredictor), bw,
         xd->mi[0]->mbmi.interintra_uv_mode, bsize, plane);
     combine_interintra_highbd(xd->mi[0]->mbmi.interintra_uv_mode,
+                              xd->mi[0]->mbmi.use_wedge_interintra,
+                              xd->mi[0]->mbmi.interintra_uv_wedge_index,
+                              bsize,
                               uvbsize,
                               xd->plane[plane].dst.buf,
                               xd->plane[plane].dst.stride,
@@ -1168,8 +1771,11 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd,
     build_intra_predictors_for_interintra(
         xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
         uintrapredictor, bw,
-        xd->mi[0]->mbmi.interintra_uv_mode, bsize, 1);
+        xd->mi[0]->mbmi.interintra_uv_mode, bsize, plane);
     combine_interintra(xd->mi[0]->mbmi.interintra_uv_mode,
+                       xd->mi[0]->mbmi.use_wedge_interintra,
+                       xd->mi[0]->mbmi.interintra_uv_wedge_index,
+                       bsize,
                        uvbsize,
                        xd->plane[plane].dst.buf,
                        xd->plane[plane].dst.stride,
@@ -1196,4 +1802,271 @@ void vp10_build_interintra_predictors(MACROBLOCKD *xd,
   vp10_build_interintra_predictors_sbuv(xd, upred, vpred,
                                         ustride, vstride, bsize);
 }
+
+// Builds the inter-predictor for the single ref case
+// for use in the encoder to search the wedges efficiently.
+static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
+                                              int block,
+                                              int bw, int bh,
+                                              int x, int y, int w, int h,
+                                              int mi_x, int mi_y,
+                                              int ref,
+                                              uint8_t *const ext_dst,
+                                              int ext_dst_stride) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MODE_INFO *mi = xd->mi[0];
+  const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
+
+  const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+  struct buf_2d *const pre_buf = &pd->pre[ref];
+#if CONFIG_VP9_HIGHBITDEPTH
+  uint8_t *const dst =
+      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ?
+      CONVERT_TO_BYTEPTR(ext_dst) : ext_dst) + ext_dst_stride * y + x;
+#else
+  uint8_t *const dst = ext_dst + ext_dst_stride * y + x;
+#endif
+  const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+      ? average_split_mvs(pd, mi, ref, block)
+      : mi->mbmi.mv[ref].as_mv;
+
+  // TODO(jkoleszar): This clamping is done in the incorrect place for the
+  // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+  // MV. Note however that it performs the subsampling aware scaling so
+  // that the result is always q4.
+  // mv_precision precision is MV_PRECISION_Q4.
+  const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
+                                             pd->subsampling_x,
+                                             pd->subsampling_y);
+
+  uint8_t *pre;
+  MV32 scaled_mv;
+  int xs, ys, subpel_x, subpel_y;
+  const int is_scaled = vp10_is_scaled(sf);
+
+  if (is_scaled) {
+    pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+    scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+    xs = sf->x_step_q4;
+    ys = sf->y_step_q4;
+  } else {
+    pre = pre_buf->buf + (y * pre_buf->stride + x);
+    scaled_mv.row = mv_q4.row;
+    scaled_mv.col = mv_q4.col;
+    xs = ys = 16;
+  }
+
+  subpel_x = scaled_mv.col & SUBPEL_MASK;
+  subpel_y = scaled_mv.row & SUBPEL_MASK;
+  pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
+      + (scaled_mv.col >> SUBPEL_BITS);
+
+  vp10_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride,
+                            subpel_x, subpel_y, sf, w, h, 0,
+                            interp_filter, xs, ys, xd);
+}
+
+void vp10_build_inter_predictors_for_planes_single_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize,
+    int mi_row, int mi_col, int ref,
+    uint8_t *ext_dst[3], int ext_dst_stride[3]) {
+  const int plane_from = 0;
+  const int plane_to = 2;
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+  for (plane = plane_from; plane <= plane_to; ++plane) {
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+                                                        &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      int i = 0, x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          build_inter_predictors_single_buf(xd, plane,
+                                            i++, bw, bh,
+                                            4 * x, 4 * y, 4, 4,
+                                            mi_x, mi_y, ref,
+                                            ext_dst[plane],
+                                            ext_dst_stride[plane]);
+    } else {
+      build_inter_predictors_single_buf(xd, plane,
+                                        0, bw, bh,
+                                        0, 0, bw, bh,
+                                        mi_x, mi_y, ref,
+                                        ext_dst[plane],
+                                        ext_dst_stride[plane]);
+    }
+  }
+}
+
+static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
+                                                 int block, int bw, int bh,
+                                                 int x, int y, int w, int h,
+#if CONFIG_SUPERTX
+                                                 int wedge_offset_x,
+                                                 int wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+                                                 int mi_x, int mi_y,
+                                                 uint8_t *ext_dst0,
+                                                 int ext_dst_stride0,
+                                                 uint8_t *ext_dst1,
+                                                 int ext_dst_stride1) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MODE_INFO *mi = xd->mi[0];
+  const int is_compound = has_second_ref(&mi->mbmi);
+  int ref;
+  (void) block;
+  (void) bw;
+  (void) bh;
+  (void) mi_x;
+  (void) mi_y;
+
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    struct buf_2d *const dst_buf = &pd->dst;
+    uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+
+    if (ref && get_wedge_bits(mi->mbmi.sb_type)
+        && mi->mbmi.use_wedge_interinter) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE];
+      uint8_t *tmp_dst =
+          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
+          CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
+#else
+      uint8_t tmp_dst[CU_SIZE * CU_SIZE];
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          int k;
+          for (k = 0; k < h; ++k)
+            memcpy(tmp_dst_ + 2 * CU_SIZE * k, ext_dst1 +
+                   ext_dst_stride1 * 2 * k, w * 2);
+        } else {
+          int k;
+          for (k = 0; k < h; ++k)
+            memcpy(tmp_dst_ + CU_SIZE * k, ext_dst1 +
+                   ext_dst_stride1 * k, w);
+        }
+#else
+        {
+          int k;
+          for (k = 0; k < h; ++k)
+            memcpy(tmp_dst + CU_SIZE * k, ext_dst1 +
+                   ext_dst_stride1 * k, w);
+        }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_SUPERTX
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+        build_masked_compound_extend_highbd(
+            dst, dst_buf->stride, tmp_dst, CU_SIZE, plane,
+            mi->mbmi.interinter_wedge_index,
+            mi->mbmi.sb_type,
+            wedge_offset_y, wedge_offset_x, h, w);
+      } else {
+        build_masked_compound_extend(
+            dst, dst_buf->stride, tmp_dst, CU_SIZE, plane,
+            mi->mbmi.interinter_wedge_index,
+            mi->mbmi.sb_type,
+            wedge_offset_y, wedge_offset_x, h, w);
+      }
+#else
+      build_masked_compound_extend(dst, dst_buf->stride, tmp_dst,
+                                   CU_SIZE, plane,
+                                   mi->mbmi.interinter_wedge_index,
+                                   mi->mbmi.sb_type,
+                                   wedge_offset_y, wedge_offset_x, h, w);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#else   // CONFIG_SUPERTX
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+        build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst,
+                                     CU_SIZE,
+                                     mi->mbmi.interinter_wedge_index,
+                                     mi->mbmi.sb_type, h, w);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        build_masked_compound(dst, dst_buf->stride, tmp_dst, CU_SIZE,
+                              mi->mbmi.interinter_wedge_index,
+                              mi->mbmi.sb_type, h, w);
+#endif  // CONFIG_SUPERTX
+    } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          int k;
+          for (k = 0; k < h; ++k)
+            memcpy(CONVERT_TO_SHORTPTR(dst + dst_buf->stride * k),
+                   ext_dst0 + ext_dst_stride0 * 2 * k, w * 2);
+        } else {
+          int k;
+          for (k = 0; k < h; ++k)
+            memcpy(dst + dst_buf->stride * k,
+                   ext_dst0 + ext_dst_stride0 * k, w);
+        }
+#else
+        {
+          int k;
+          for (k = 0; k < h; ++k)
+            memcpy(dst + dst_buf->stride * k,
+                   ext_dst0 + ext_dst_stride0 * k, w);
+        }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    }
+  }
+}
+
+void vp10_build_wedge_inter_predictor_from_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize,
+    int mi_row, int mi_col,
+    uint8_t *ext_dst0[3], int ext_dst_stride0[3],
+    uint8_t *ext_dst1[3], int ext_dst_stride1[3]) {
+  const int plane_from = 0;
+  const int plane_to = 2;
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+  for (plane = plane_from; plane <= plane_to; ++plane) {
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+                                                        &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      int i = 0, x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          build_wedge_inter_predictor_from_buf(xd, plane, i++, bw, bh,
+                                               4 * x, 4 * y, 4, 4,
+#if CONFIG_SUPERTX
+                                               0, 0,
+#endif
+                                               mi_x, mi_y,
+                                               ext_dst0[plane],
+                                               ext_dst_stride0[plane],
+                                               ext_dst1[plane],
+                                               ext_dst_stride1[plane]);
+    } else {
+      build_wedge_inter_predictor_from_buf(xd, plane, 0, bw, bh,
+                                           0, 0, bw, bh,
+#if CONFIG_SUPERTX
+                                           0, 0,
+#endif
+                                           mi_x, mi_y,
+                                           ext_dst0[plane],
+                                           ext_dst_stride0[plane],
+                                           ext_dst1[plane],
+                                           ext_dst_stride1[plane]);
+    }
+  }
+}
 #endif  // CONFIG_EXT_INTER
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 0e7fa4c48cad1e468131d0e60a0e9229a7a547b9..c6e89df8bac571b2c9f43b8100d6aa0a49b22c47 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -62,14 +62,14 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static INLINE void high_inter_predictor(const uint8_t *src, int src_stride,
-                                        uint8_t *dst, int dst_stride,
-                                        const int subpel_x,
-                                        const int subpel_y,
-                                        const struct scale_factors *sf,
-                                        int w, int h, int ref,
-                                        const INTERP_FILTER interp_filter,
-                                        int xs, int ys, int bd) {
+static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
+                                          uint8_t *dst, int dst_stride,
+                                          const int subpel_x,
+                                          const int subpel_y,
+                                          const struct scale_factors *sf,
+                                          int w, int h, int ref,
+                                          const INTERP_FILTER interp_filter,
+                                          int xs, int ys, int bd) {
   InterpFilterParams interp_filter_params =
       vp10_get_interp_filter_params(interp_filter);
   if (interp_filter_params.taps == SUBPEL_TAPS) {
@@ -105,6 +105,61 @@ static INLINE void high_inter_predictor(const uint8_t *src, int src_stride,
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+void build_inter_predictors(MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+                            int mi_col_offset, int mi_row_offset,
+#endif  // CONFIG_OBMC
+                            int block,
+                            int bw, int bh,
+                            int x, int y, int w, int h,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                            int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                            int mi_x, int mi_y);
+
+static INLINE void vp10_make_inter_predictor(
+    const uint8_t *src,
+    int src_stride,
+    uint8_t *dst,
+    int dst_stride,
+    const int subpel_x,
+    const int subpel_y,
+    const struct scale_factors *sf,
+    int w, int h, int ref,
+    const INTERP_FILTER interp_filter,
+    int xs, int ys,
+    const MACROBLOCKD *xd) {
+  (void) xd;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    highbd_inter_predictor(src, src_stride, dst, dst_stride,
+                           subpel_x, subpel_y, sf, w, h, ref,
+                           interp_filter, xs, ys, xd->bd);
+  else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    inter_predictor(src, src_stride, dst, dst_stride,
+                    subpel_x, subpel_y, sf, w, h, ref,
+                    interp_filter, xs, ys);
+}
+
+#if CONFIG_EXT_INTER
+void vp10_make_masked_inter_predictor(
+    const uint8_t *pre,
+    int pre_stride,
+    uint8_t *dst,
+    int dst_stride,
+    const int subpel_x,
+    const int subpel_y,
+    const struct scale_factors *sf,
+    int w, int h,
+    const INTERP_FILTER interp_filter,
+    int xs, int ys,
+#if CONFIG_SUPERTX
+    int plane, int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+    const MACROBLOCKD *xd);
+#endif  // CONFIG_EXT_INTER
+
 static INLINE int round_mv_comp_q4(int value) {
   return (value < 0 ? value - 2 : value + 2) / 4;
 }
@@ -114,10 +169,10 @@ static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
                               mi->bmi[1].as_mv[idx].as_mv.row +
                               mi->bmi[2].as_mv[idx].as_mv.row +
                               mi->bmi[3].as_mv[idx].as_mv.row),
-             round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col +
-                              mi->bmi[1].as_mv[idx].as_mv.col +
-                              mi->bmi[2].as_mv[idx].as_mv.col +
-                              mi->bmi[3].as_mv[idx].as_mv.col) };
+     round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col +
+                      mi->bmi[1].as_mv[idx].as_mv.col +
+                      mi->bmi[2].as_mv[idx].as_mv.col +
+                      mi->bmi[3].as_mv[idx].as_mv.col) };
   return res;
 }
 
@@ -183,15 +238,6 @@ static INLINE MV average_split_mvs(const struct macroblockd_plane *pd,
   return res;
 }
 
-void build_inter_predictors(MACROBLOCKD *xd, int plane,
-#if CONFIG_OBMC
-                            int mi_col_offset, int mi_row_offset,
-#endif  // CONFIG_OBMC
-                            int block,
-                            int bw, int bh,
-                            int x, int y, int w, int h,
-                            int mi_x, int mi_y);
-
 void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane,
                                        int i, int ir, int ic,
                                        int mi_row, int mi_col);
@@ -209,9 +255,21 @@ void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
                                     BLOCK_SIZE bsize);
 
 #if CONFIG_SUPERTX
-void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd,
-                                           int mi_row, int mi_col,
-                                           BLOCK_SIZE bsize, int block);
+void vp10_build_inter_predictors_sb_sub8x8_extend(
+    MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+    int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+    int mi_row, int mi_col,
+    BLOCK_SIZE bsize, int block);
+
+void vp10_build_inter_predictors_sb_extend(
+    MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+    int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+    int mi_row, int mi_col,
+    BLOCK_SIZE bsize);
 struct macroblockd_plane;
 void vp10_build_masked_inter_predictor_complex(
     MACROBLOCKD *xd,
@@ -219,7 +277,6 @@ void vp10_build_masked_inter_predictor_complex(
     const struct macroblockd_plane *pd, int mi_row, int mi_col,
     int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
     PARTITION_TYPE partition, int plane);
-
 #endif  // CONFIG_SUPERTX
 
 void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
@@ -376,6 +433,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
 #endif  // CONFIG_OBMC
 
 #if CONFIG_EXT_INTER
+#define MASK_MASTER_SIZE   (2 * CU_SIZE)
+#define MASK_MASTER_STRIDE (2 * CU_SIZE)
+
+void vp10_init_wedge_masks();
+
+const uint8_t *vp10_get_soft_mask(int wedge_index,
+                                  BLOCK_SIZE sb_type,
+                                  int h, int w);
+
 void vp10_build_interintra_predictors(MACROBLOCKD *xd,
                                       uint8_t *ypred,
                                       uint8_t *upred,
@@ -398,6 +464,17 @@ void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd,
                                            uint8_t *vpred,
                                            int ustride, int vstride,
                                            BLOCK_SIZE bsize);
+
+// Encoder only
+void vp10_build_inter_predictors_for_planes_single_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize,
+    int mi_row, int mi_col, int ref,
+                uint8_t *ext_dst[3], int ext_dst_stride[3]);
+void vp10_build_wedge_inter_predictor_from_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize,
+    int mi_row, int mi_col,
+    uint8_t *ext_dst0[3], int ext_dst_stride0[3],
+    uint8_t *ext_dst1[3], int ext_dst_stride1[3]);
 #endif  // CONFIG_EXT_INTER
 
 #ifdef __cplusplus
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index ecc971a7c21d4609b09428af4d59071ae74e5e54..963eed166d278532da2dd0d30e0cf2da53ba6b57 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -412,6 +412,13 @@ void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts,
   for (i = 0; i < BLOCK_SIZES; i++)
     for (j = 0; j < 2; j++)
       cm->counts.interintra[i][j] += counts->interintra[i][j];
+
+  for (i = 0; i < BLOCK_SIZES; i++)
+    for (j = 0; j < 2; j++)
+      cm->counts.wedge_interintra[i][j] += counts->wedge_interintra[i][j];
+  for (i = 0; i < BLOCK_SIZES; i++)
+    for (j = 0; j < 2; j++)
+      cm->counts.wedge_interinter[i][j] += counts->wedge_interinter[i][j];
 #endif  // CONFIG_EXT_INTER
 
   for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index 2344ce2b2dd73ff9b23838ddc2c32fe2b2d1acbd..1e2ef58bdb3f4ced68e75662c854168dbb29fa66 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -12,7 +12,7 @@ struct macroblockd;
 
 /* Encoder forward decls */
 struct macroblock;
-struct vp9_variance_vtable;
+struct vp10_variance_vtable;
 struct search_site_config;
 struct mv;
 union int_mv;
@@ -614,15 +614,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 #
 # Motion search
 #
-add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
+add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
 specialize qw/vp10_full_search_sad sse3 sse4_1/;
 $vp10_full_search_sad_sse3=vp10_full_search_sadx3;
 $vp10_full_search_sad_sse4_1=vp10_full_search_sadx8;
 
-add_proto qw/int vp10_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp10_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp10_diamond_search_sad/;
 
-add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp10_full_range_search/;
 
 add_proto qw/void vp10_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 64ac3ccf3b5539a0b1c99f71a09a32200c5c7d83..98d291087f7a0fea7ea6e3e361f378e0c86a9233 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -540,10 +540,10 @@ static void build_mc_border(const uint8_t *src, int src_stride,
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static void high_build_mc_border(const uint8_t *src8, int src_stride,
-                                 uint16_t *dst, int dst_stride,
-                                 int x, int y, int b_w, int b_h,
-                                 int w, int h) {
+static void build_mc_border_highbd(const uint8_t *src8, int src_stride,
+                                   uint16_t *dst, int dst_stride,
+                                   int x, int y, int b_w, int b_h,
+                                   int w, int h) {
   // Get a pointer to the start of the real data for this row.
   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
   const uint16_t *ref_row = src - x - y * src_stride;
@@ -585,39 +585,54 @@ static void high_build_mc_border(const uint8_t *src8, int src_stride,
   } while (--b_h);
 }
 
-static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
-                               int x0, int y0, int b_w, int b_h,
-                               int frame_width, int frame_height,
-                               int border_offset,
-                               uint8_t *const dst, int dst_buf_stride,
-                               int subpel_x, int subpel_y,
-                               const INTERP_FILTER interp_filter,
-                               const struct scale_factors *sf,
-                               MACROBLOCKD *xd,
-                               int w, int h, int ref, int xs, int ys) {
-  DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
+static void extend_and_predict_highbd(const uint8_t *buf_ptr1,
+                                      int pre_buf_stride,
+                                      int x0, int y0, int b_w, int b_h,
+                                      int frame_width, int frame_height,
+                                      int border_offset,
+                                      uint8_t *const dst, int dst_buf_stride,
+                                      int subpel_x, int subpel_y,
+                                      const INTERP_FILTER interp_filter,
+                                      const struct scale_factors *sf,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                      int plane,
+                                      int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                      MACROBLOCKD *xd,
+                                      int w, int h, int ref, int xs, int ys) {
+  DECLARE_ALIGNED(16, uint16_t,
+                  mc_buf_high[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]);
   const uint8_t *buf_ptr;
 
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w,
-                         x0, y0, b_w, b_h, frame_width, frame_height);
+    build_mc_border_highbd(buf_ptr1, pre_buf_stride, mc_buf_high, b_w,
+                           x0, y0, b_w, b_h, frame_width, frame_height);
     buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset;
   } else {
     build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w,
                     x0, y0, b_w, b_h, frame_width, frame_height);
     buf_ptr = ((uint8_t *)mc_buf_high) + border_offset;
   }
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
-                         subpel_y, sf, w, h, ref, interp_filter,
-                         xs, ys, xd->bd);
-  } else {
-    inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
-                    subpel_y, sf, w, h, ref, interp_filter, xs, ys);
-  }
+#if CONFIG_EXT_INTER
+  if (ref && get_wedge_bits(xd->mi[0]->mbmi.sb_type) &&
+      xd->mi[0]->mbmi.use_wedge_interinter)
+    vp10_make_masked_inter_predictor(
+        buf_ptr, b_w, dst, dst_buf_stride,
+        subpel_x, subpel_y, sf, w, h,
+        interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+        plane, wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+        xd);
+  else
+#endif  // CONFIG_EXT_INTER
+    vp10_make_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride,
+                              subpel_x, subpel_y, sf, w, h, ref,
+                              interp_filter, xs, ys, xd);
 }
+
 #else
+
 static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
                                int x0, int y0, int b_w, int b_h,
                                int frame_width, int frame_height,
@@ -626,21 +641,48 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
                                int subpel_x, int subpel_y,
                                const INTERP_FILTER interp_filter,
                                const struct scale_factors *sf,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                               int plane,
+                               int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                               MACROBLOCKD *xd,
                                int w, int h, int ref, int xs, int ys) {
-  DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
+  DECLARE_ALIGNED(16, uint8_t, mc_buf[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]);
   const uint8_t *buf_ptr;
 
   build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w,
                   x0, y0, b_w, b_h, frame_width, frame_height);
   buf_ptr = mc_buf + border_offset;
-
-  inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
-                  subpel_y, sf, w, h, ref, interp_filter, xs, ys);
+#if CONFIG_EXT_INTER
+  if (ref && get_wedge_bits(xd->mi[0]->mbmi.sb_type) &&
+      xd->mi[0]->mbmi.use_wedge_interinter)
+    vp10_make_masked_inter_predictor(
+        buf_ptr, b_w, dst, dst_buf_stride,
+        subpel_x, subpel_y, sf, w, h,
+        interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+        plane, wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+        xd);
+  else
+#endif  // CONFIG_EXT_INTER
+    vp10_make_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride,
+                              subpel_x, subpel_y, sf, w, h, ref,
+                              interp_filter, xs, ys, xd);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
-                                       int plane, int bw, int bh, int x,
-                                       int y, int w, int h, int mi_x, int mi_y,
+
+static void dec_build_inter_predictors(VP10Decoder *const pbi,
+                                       MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+                                       int mi_col_offset, int mi_row_offset,
+#endif  // CONFIG_OBMC
+                                       int bw, int bh,
+                                       int x, int y, int w, int h,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                       int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                       int mi_x, int mi_y,
                                        const INTERP_FILTER interp_filter,
                                        const struct scale_factors *sf,
                                        struct buf_2d *pre_buf,
@@ -655,9 +697,20 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
   int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
       buf_stride, subpel_x, subpel_y;
   uint8_t *ref_frame, *buf_ptr;
+#if CONFIG_EXT_INTER
+#if CONFIG_OBMC
+  const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
+#else
+  const MODE_INFO *mi = xd->mi[0];
+#endif  // CONFIG_OBMC
+#endif  // CONFIG_EXT_INTER
 #if CONFIG_EXT_INTERP
   const int i_filter = IsInterpolatingFilter(interp_filter);
 #endif  // CONFIG_EXT_INTERP
+#if CONFIG_OBMC
+  (void) mi_col_offset;
+  (void) mi_row_offset;
+#endif  // CONFIG_OBMC
 
   // Get reference frame pointer, width and height.
   if (plane == 0) {
@@ -777,15 +830,27 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
       const int border_offset = y_pad * (filter_size / 2 - 1) * b_w +
                                 x_pad * (filter_size / 2 - 1);
 
+#if CONFIG_VP9_HIGHBITDEPTH
+      extend_and_predict_highbd(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
+                                frame_width, frame_height, border_offset,
+                                dst, dst_buf->stride,
+                                subpel_x, subpel_y,
+                                interp_filter, sf,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                plane, wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                xd, w, h, ref, xs, ys);
+#else
       extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
                          frame_width, frame_height, border_offset,
                          dst, dst_buf->stride,
                          subpel_x, subpel_y,
                          interp_filter, sf,
-#if CONFIG_VP9_HIGHBITDEPTH
-                         xd,
-#endif
-                         w, h, ref, xs, ys);
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                         plane, wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                         xd, w, h, ref, xs, ys);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       return;
     }
   } else {
@@ -797,29 +862,44 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
                              VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
      }
   }
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                         subpel_y, sf, w, h, ref, interp_filter,
-                         xs, ys, xd->bd);
+#if CONFIG_EXT_INTER
+  if (ref && get_wedge_bits(mi->mbmi.sb_type) &&
+      mi->mbmi.use_wedge_interinter) {
+    vp10_make_masked_inter_predictor(
+        buf_ptr, buf_stride, dst, dst_buf->stride,
+        subpel_x, subpel_y, sf, w, h,
+        interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+        plane, wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+        xd);
   } else {
-    inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                    subpel_y, sf, w, h, ref, interp_filter, xs, ys);
+    vp10_make_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride,
+                              subpel_x, subpel_y, sf, w, h, ref,
+                              interp_filter, xs, ys, xd);
   }
 #else
-  inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                  subpel_y, sf, w, h, ref, interp_filter, xs, ys);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+  vp10_make_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride,
+                            subpel_x, subpel_y, sf, w, h, ref,
+                            interp_filter, xs, ys, xd);
+#endif  // CONFIG_EXT_INTER
 }
 #endif  // (CONFIG_SUPERTX || CONFIG_OBMC)
 
 #if CONFIG_SUPERTX
-static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
-                                          MACROBLOCKD *xd,
-                                          int mi_row, int mi_col) {
+static void dec_build_inter_predictors_sb_extend(
+    VP10Decoder *const pbi, MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+    int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+    int mi_row, int mi_col) {
   int plane;
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+  const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+  const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif  // CONFIG_EXT_INTER
   const MODE_INFO *mi = xd->mi[0];
   const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
   const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
@@ -827,6 +907,7 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
 
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
     struct macroblockd_plane *const pd = &xd->plane[plane];
+
     struct buf_2d *const dst_buf = &pd->dst;
     const int num_4x4_w = pd->n4_w;
     const int num_4x4_h = pd->n4_h;
@@ -855,24 +936,44 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
         for (y = 0; y < num_4x4_h; ++y) {
           for (x = 0; x < num_4x4_w; ++x) {
             const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
-            dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
-                                       4 * x, 4 * y, pw, ph, mi_x, mi_y,
-                                       interp_filter, sf, pre_buf, dst_buf,
-                                       &mv, ref_frame_buf, is_scaled, ref);
+            dec_build_inter_predictors(
+                pbi, xd, plane,
+#if CONFIG_OBMC
+                0, 0,
+#endif  // CONFIG_OBMC
+                n4w_x4, n4h_x4,
+                4 * x, 4 * y, pw, ph,
+#if CONFIG_EXT_INTER
+                wedge_offset_x >> (pd->subsampling_x),
+                wedge_offset_y >> (pd->subsampling_y),
+#endif  // CONFIG_EXT_INTER
+                mi_x, mi_y,
+                interp_filter, sf, pre_buf, dst_buf,
+                &mv, ref_frame_buf, is_scaled, ref);
           }
         }
       } else {
         const MV mv = mi->mbmi.mv[ref].as_mv;
-        dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
-                                   0, 0, n4w_x4, n4h_x4, mi_x, mi_y,
-                                   interp_filter, sf, pre_buf, dst_buf,
-                                   &mv, ref_frame_buf,
-                                   is_scaled, ref);
+        dec_build_inter_predictors(
+            pbi, xd, plane,
+#if CONFIG_OBMC
+            0, 0,
+#endif  // CONFIG_OBMC
+            n4w_x4, n4h_x4,
+            0, 0, n4w_x4, n4h_x4,
+#if CONFIG_EXT_INTER
+            wedge_offset_x >> (pd->subsampling_x),
+            wedge_offset_y >> (pd->subsampling_y),
+#endif  // CONFIG_EXT_INTER
+            mi_x, mi_y,
+            interp_filter, sf, pre_buf, dst_buf,
+            &mv, ref_frame_buf,
+            is_scaled, ref);
       }
     }
   }
 #if CONFIG_EXT_INTER
-  if (is_interintra_pred(&xd->mi[0]->mbmi))
+  if (is_interintra_pred(&mi->mbmi))
     vp10_build_interintra_predictors(xd,
                                      xd->plane[0].dst.buf,
                                      xd->plane[1].dst.buf,
@@ -884,15 +985,23 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
 #endif  // CONFIG_EXT_INTER
 }
 
-static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi,
-                                                 MACROBLOCKD *xd,
-                                                 int mi_row, int mi_col,
-                                                 int block) {
+static void dec_build_inter_predictors_sb_sub8x8_extend(
+    VP10Decoder *const pbi,
+    MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+    int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+    int mi_row, int mi_col,
+    int block) {
   // Prediction function used in supertx:
   // Use the mv at current block (which is less than 8x8)
   int plane;
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+  const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+  const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif  // CONFIG_EXT_INTER
   const MODE_INFO *mi = xd->mi[0];
   const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
   const int is_compound = has_second_ref(&mi->mbmi);
@@ -919,14 +1028,23 @@ static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi,
       RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
       const int is_scaled = vp10_is_scaled(sf);
       const MV mv = average_split_mvs(pd, mi, ref, block);
-      dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
-                                 0, 0, n4w_x4, n4h_x4, mi_x, mi_y,
+      dec_build_inter_predictors(pbi, xd, plane,
+#if CONFIG_OBMC
+                                 0, 0,
+#endif  // CONFIG_OBMC
+                                 n4w_x4, n4h_x4,
+                                 0, 0, n4w_x4, n4h_x4,
+#if CONFIG_EXT_INTER
+                                 wedge_offset_x >> (pd->subsampling_x),
+                                 wedge_offset_y >> (pd->subsampling_y),
+#endif  // CONFIG_EXT_INTER
+                                 mi_x, mi_y,
                                  interp_filter, sf, pre_buf, dst_buf,
                                  &mv, ref_frame_buf, is_scaled, ref);
     }
   }
 #if CONFIG_EXT_INTER
-  if (is_interintra_pred(&xd->mi[0]->mbmi))
+  if (is_interintra_pred(&mi->mbmi))
     vp10_build_interintra_predictors(xd,
                                      xd->plane[0].dst.buf,
                                      xd->plane[1].dst.buf,
@@ -934,7 +1052,7 @@ static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi,
                                      xd->plane[0].dst.stride,
                                      xd->plane[1].dst.stride,
                                      xd->plane[2].dst.stride,
-                                     xd->mi[0]->mbmi.sb_type);
+                                     mi->mbmi.sb_type);
 #endif  // CONFIG_EXT_INTER
 }
 #endif  // CONFIG_SUPERTX
@@ -964,7 +1082,7 @@ static void dec_build_prediction_by_above_preds(VP10Decoder *const pbi,
 
     mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[sb_type]);
 
-    if (!is_inter_block(mbmi))
+    if (!is_neighbor_overlappable(mbmi))
       continue;
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
@@ -1021,15 +1139,27 @@ static void dec_build_prediction_by_above_preds(VP10Decoder *const pbi,
                   && y == 0 && !pd->subsampling_y)
                 continue;
 
-              dec_build_inter_predictors(pbi, xd, j, bw, bh,
-                                         4 * x, 0, pw, bh, mi_x, mi_y,
+              dec_build_inter_predictors(pbi, xd, j,
+                                         mi_col_offset, mi_row_offset,
+                                         bw, bh,
+                                         4 * x, 0, pw, bh,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                         0, 0,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                         mi_x, mi_y,
                                          interp_filter, sf, pre_buf, dst_buf,
                                          &mv, ref_frame_buf, is_scaled, ref);
             }
         } else {
           const MV mv = mi->mbmi.mv[ref].as_mv;
-          dec_build_inter_predictors(pbi, xd, j, bw, bh,
-                                     0, 0, bw, bh, mi_x, mi_y, interp_filter,
+          dec_build_inter_predictors(pbi, xd, j,
+                                     mi_col_offset, mi_row_offset,
+                                     bw, bh,
+                                     0, 0, bw, bh,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                     0, 0,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                     mi_x, mi_y, interp_filter,
                                      sf, pre_buf, dst_buf, &mv, ref_frame_buf,
                                      is_scaled, ref);
         }
@@ -1065,7 +1195,7 @@ static void dec_build_prediction_by_left_preds(VP10Decoder *const pbi,
 
     mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[sb_type]);
 
-    if (!is_inter_block(mbmi))
+    if (!is_neighbor_overlappable(mbmi))
       continue;
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
@@ -1123,15 +1253,31 @@ static void dec_build_prediction_by_left_preds(VP10Decoder *const pbi,
                   && x == 0 && !pd->subsampling_x)
                 continue;
 
-              dec_build_inter_predictors(pbi, xd, j, bw, bh,
-                                         0, 4 * y, bw, ph, mi_x, mi_y,
+              dec_build_inter_predictors(pbi, xd, j,
+#if CONFIG_OBMC
+                                         mi_col_offset, mi_row_offset,
+#endif  // CONFIG_OBMC
+                                         bw, bh,
+                                         0, 4 * y, bw, ph,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                         0, 0,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                         mi_x, mi_y,
                                          interp_filter, sf, pre_buf, dst_buf,
                                          &mv, ref_frame_buf, is_scaled, ref);
             }
         } else {
           const MV mv = mi->mbmi.mv[ref].as_mv;
-          dec_build_inter_predictors(pbi, xd, j, bw, bh,
-                                     0, 0, bw, bh, mi_x, mi_y, interp_filter,
+          dec_build_inter_predictors(pbi, xd, j,
+#if CONFIG_OBMC
+                                     mi_col_offset, mi_row_offset,
+#endif  // CONFIG_OBMC
+                                     bw, bh,
+                                     0, 0, bw, bh,
+#if CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                     0, 0,
+#endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX
+                                     mi_x, mi_y, interp_filter,
                                      sf, pre_buf, dst_buf, &mv, ref_frame_buf,
                                      is_scaled, ref);
         }
@@ -1274,8 +1420,7 @@ static void set_offsets_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd,
 
 static void set_param_topblock(VP10_COMMON *const cm,  MACROBLOCKD *const xd,
                                BLOCK_SIZE bsize, int mi_row, int mi_col,
-                               int txfm,
-                               int skip) {
+                               int txfm, int skip) {
   const int bw = num_8x8_blocks_wide_lookup[bsize];
   const int bh = num_8x8_blocks_high_lookup[bsize];
   const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
@@ -1367,10 +1512,19 @@ static void dec_predict_b_extend(
                          (c >> xd->plane[2].subsampling_x);
 
   if (!b_sub8x8)
-    dec_build_inter_predictors_sb(pbi, xd, mi_row_pred, mi_col_pred);
+    dec_build_inter_predictors_sb_extend(
+        pbi, xd,
+#if CONFIG_EXT_INTER
+        mi_row_ori, mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+        mi_row_pred, mi_col_pred);
   else
-    dec_build_inter_predictors_sb_sub8x8(pbi, xd, mi_row_pred, mi_col_pred,
-                                         block);
+    dec_build_inter_predictors_sb_sub8x8_extend(
+        pbi, xd,
+#if CONFIG_EXT_INTER
+        mi_row_ori, mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+        mi_row_pred, mi_col_pred, block);
 }
 
 static void dec_extend_dir(VP10Decoder *const pbi, MACROBLOCKD *const xd,
@@ -1872,38 +2026,43 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
 #if CONFIG_OBMC
       if (mbmi->obmc) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
-        DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]);
+        DECLARE_ALIGNED(16, uint8_t,
+                        tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
+        DECLARE_ALIGNED(16, uint8_t,
+                        tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
 #else
-        DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
-        DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
+        DECLARE_ALIGNED(16, uint8_t,
+                        tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
+        DECLARE_ALIGNED(16, uint8_t,
+                        tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
         uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
-        int dst_stride1[MAX_MB_PLANE] = {64, 64, 64};
-        int dst_stride2[MAX_MB_PLANE] = {64, 64, 64};
+        int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
+        int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
 
         assert(mbmi->sb_type >= BLOCK_8X8);
 #if CONFIG_VP9_HIGHBITDEPTH
         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
           int len = sizeof(uint16_t);
           dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
-          dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len);
-          dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len);
+          dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len);
+          dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 +
+                                           CU_SIZE * CU_SIZE * 2 * len);
           dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
-          dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len);
-          dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len);
+          dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len);
+          dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 +
+                                           CU_SIZE * CU_SIZE * 2 * len);
         } else {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-        dst_buf1[0] = tmp_buf1;
-        dst_buf1[1] = tmp_buf1 + 4096;
-        dst_buf1[2] = tmp_buf1 + 8192;
-        dst_buf2[0] = tmp_buf2;
-        dst_buf2[1] = tmp_buf2 + 4096;
-        dst_buf2[2] = tmp_buf2 + 8192;
+          dst_buf1[0] = tmp_buf1;
+          dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE;
+          dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2;
+          dst_buf2[0] = tmp_buf2;
+          dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE;
+          dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2;
 #if CONFIG_VP9_HIGHBITDEPTH
         }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-
         dec_build_prediction_by_above_preds(pbi, xd, mi_row, mi_col,
                                             dst_buf1, dst_stride1);
         dec_build_prediction_by_left_preds(pbi, xd, mi_row, mi_col,
@@ -3591,13 +3750,24 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
 
 #if CONFIG_EXT_INTER
     read_inter_compound_mode_probs(fc, &r);
-
     if (cm->reference_mode != COMPOUND_REFERENCE) {
       for (i = 0; i < BLOCK_SIZES; i++) {
         if (is_interintra_allowed_bsize(i)) {
           vp10_diff_update_prob(&r, &fc->interintra_prob[i]);
         }
       }
+      for (i = 0; i < BLOCK_SIZES; i++) {
+        if (is_interintra_allowed_bsize(i) && get_wedge_bits(i)) {
+          vp10_diff_update_prob(&r, &fc->wedge_interintra_prob[i]);
+        }
+      }
+    }
+    if (cm->reference_mode != SINGLE_REFERENCE) {
+      for (i = 0; i < BLOCK_SIZES; i++) {
+        if (get_wedge_bits(i)) {
+          vp10_diff_update_prob(&r, &fc->wedge_interinter_prob[i]);
+        }
+      }
     }
 #endif  // CONFIG_EXT_INTER
 
@@ -3666,6 +3836,10 @@ static void debug_check_frame_counts(const VP10_COMMON *const cm) {
                  sizeof(cm->counts.inter_compound_mode)));
   assert(!memcmp(cm->counts.interintra, zero_counts.interintra,
                  sizeof(cm->counts.interintra)));
+  assert(!memcmp(cm->counts.wedge_interintra, zero_counts.wedge_interintra,
+                 sizeof(cm->counts.wedge_interintra)));
+  assert(!memcmp(cm->counts.wedge_interinter, zero_counts.wedge_interinter,
+                 sizeof(cm->counts.wedge_interinter)));
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_OBMC
   assert(!memcmp(cm->counts.obmc, zero_counts.obmc,
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index fccd3c880b80d2231419579b5603399495fd0bb2..a42d08bc54c434bb13283d183a03108fe30a80e4 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -1185,10 +1185,11 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
   }
 
 #if CONFIG_OBMC
+  mbmi->obmc = 0;
 #if CONFIG_SUPERTX
   if (!supertx_enabled)
 #endif  // CONFIG_SUPERTX
-  mbmi->obmc = read_is_obmc_block(cm, xd, r);
+    mbmi->obmc = read_is_obmc_block(cm, xd, r);
 #endif  // CONFIG_OBMC
 
 #if CONFIG_REF_MV
@@ -1432,7 +1433,12 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
   }
 
 #if CONFIG_EXT_INTER
+    mbmi->use_wedge_interintra = 0;
+    mbmi->use_wedge_interinter = 0;
   if (cm->reference_mode != COMPOUND_REFERENCE &&
+#if CONFIG_OBMC
+      !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
+#endif  // CONFIG_OBMC
 #if CONFIG_SUPERTX
       !supertx_enabled &&
 #endif
@@ -1444,19 +1450,42 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
     if (interintra) {
       const PREDICTION_MODE interintra_mode =
           read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
-
       mbmi->ref_frame[1] = INTRA_FRAME;
       mbmi->interintra_mode = interintra_mode;
       mbmi->interintra_uv_mode = interintra_mode;
 #if CONFIG_EXT_INTRA
-      // TODO(debargha|geza.lore):
-      // Should we use ext_intra modes for interintra?
       mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
       mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
       mbmi->angle_delta[0] = 0;
       mbmi->angle_delta[1] = 0;
       mbmi->intra_filter = INTRA_FILTER_LINEAR;
 #endif  // CONFIG_EXT_INTRA
+      if (get_wedge_bits(bsize)) {
+        mbmi->use_wedge_interintra =
+            vpx_read(r, cm->fc->wedge_interintra_prob[bsize]);
+        if (xd->counts)
+          xd->counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
+        if (mbmi->use_wedge_interintra) {
+          mbmi->interintra_wedge_index =
+          mbmi->interintra_uv_wedge_index =
+              vpx_read_literal(r, get_wedge_bits(bsize));
+        }
+      }
+    }
+  }
+  if (cm->reference_mode != SINGLE_REFERENCE &&
+      is_inter_compound_mode(mbmi->mode) &&
+#if CONFIG_OBMC
+      !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
+#endif  // CONFIG_OBMC
+      get_wedge_bits(bsize)) {
+    mbmi->use_wedge_interinter =
+        vpx_read(r, cm->fc->wedge_interinter_prob[bsize]);
+    if (xd->counts)
+      xd->counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
+    if (mbmi->use_wedge_interinter) {
+      mbmi->interinter_wedge_index =
+          vpx_read_literal(r, get_wedge_bits(bsize));
     }
   }
 #endif  // CONFIG_EXT_INTER
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index 35c53df6d5902e8fa0f6d3d362902d9fd93b1d77..d3fee853aa1cec3de65751b6efe985b7103fc2d6 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -30,6 +30,7 @@
 #include "vp10/common/postproc.h"
 #endif
 #include "vp10/common/quant_common.h"
+#include "vp10/common/reconinter.h"
 #include "vp10/common/reconintra.h"
 
 #include "vp10/decoder/decodeframe.h"
@@ -44,6 +45,9 @@ static void initialize_dec(void) {
     vpx_dsp_rtcd();
     vpx_scale_rtcd();
     vp10_init_intra_predictors();
+#if CONFIG_EXT_INTER
+    vp10_init_wedge_masks();
+#endif  // CONFIG_EXT_INTER
     init_done = 1;
   }
 }
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 2603b6b48659d9f91a3ea235d944a0edd8b9da72..ed9d2a9e2ce18b06256348fd644d5d418aac41e0 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -997,12 +997,13 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
   } else {
     int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
     write_ref_frames(cm, xd, w);
+
 #if CONFIG_OBMC
 #if CONFIG_SUPERTX
     if (!supertx_enabled)
 #endif  // CONFIG_SUPERTX
-    if (is_obmc_allowed(mbmi))
-      vpx_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]);
+      if (is_obmc_allowed(mbmi))
+        vpx_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]);
 #endif  // CONFIG_OBMC
 
 #if CONFIG_REF_MV
@@ -1052,8 +1053,8 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
 #if CONFIG_EXT_INTER
           if (!is_compound)
 #endif  // CONFIG_EXT_INTER
-          mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
-                                                mbmi->ref_frame, bsize, j);
+            mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+                                                  mbmi->ref_frame, bsize, j);
 #endif
 #if CONFIG_EXT_INTER
           if (is_inter_compound_mode(b_mode))
@@ -1162,6 +1163,9 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
 
 #if CONFIG_EXT_INTER
     if (cpi->common.reference_mode != COMPOUND_REFERENCE &&
+#if CONFIG_OBMC
+        !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
+#endif  // CONFIG_OBMC
 #if CONFIG_SUPERTX
         !supertx_enabled &&
 #endif  // CONFIG_SUPERTX
@@ -1172,8 +1176,28 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
         write_intra_mode(w, mbmi->interintra_mode,
                          cm->fc->y_mode_prob[size_group_lookup[bsize]]);
         assert(mbmi->interintra_mode == mbmi->interintra_uv_mode);
+        if (get_wedge_bits(bsize)) {
+          vpx_write(w, mbmi->use_wedge_interintra,
+                    cm->fc->wedge_interintra_prob[bsize]);
+          if (mbmi->use_wedge_interintra) {
+            vpx_write_literal(w, mbmi->interintra_wedge_index,
+                              get_wedge_bits(bsize));
+          }
+        }
       }
     }
+    if (cpi->common.reference_mode != SINGLE_REFERENCE &&
+        is_inter_compound_mode(mbmi->mode) &&
+#if CONFIG_OBMC
+        !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
+#endif  // CONFIG_OBMC
+        get_wedge_bits(bsize)) {
+      vpx_write(w, mbmi->use_wedge_interinter,
+                cm->fc->wedge_interinter_prob[bsize]);
+      if (mbmi->use_wedge_interinter)
+        vpx_write_literal(w, mbmi->interinter_wedge_index,
+                          get_wedge_bits(bsize));
+    }
 #endif  // CONFIG_EXT_INTER
 
 #if CONFIG_EXT_INTERP
@@ -2467,6 +2491,19 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
                                      cm->counts.interintra[i]);
         }
       }
+      for (i = 0; i < BLOCK_SIZES; i++) {
+        if (is_interintra_allowed_bsize(i) && get_wedge_bits(i))
+          vp10_cond_prob_diff_update(&header_bc,
+                                     &fc->wedge_interintra_prob[i],
+                                     cm->counts.wedge_interintra[i]);
+      }
+    }
+    if (cm->reference_mode != SINGLE_REFERENCE) {
+      for (i = 0; i < BLOCK_SIZES; i++)
+        if (get_wedge_bits(i))
+          vp10_cond_prob_diff_update(&header_bc,
+                                     &fc->wedge_interinter_prob[i],
+                                     cm->counts.wedge_interinter[i]);
     }
 #endif  // CONFIG_EXT_INTER
 
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 8c7af63f06339d41b8b2b0ad20577a03c959bbf7..bea01575df4608e5a58c68116533ba78b623b943 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -61,6 +61,9 @@ static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile,
                           int mi_row, int mi_col, BLOCK_SIZE bsize,
                           PC_TREE *pc_tree);
 static void predict_superblock(VP10_COMP *cpi, ThreadData *td,
+#if CONFIG_EXT_INTER
+                               int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
                                int mi_row_pred, int mi_col_pred,
                                BLOCK_SIZE bsize_pred, int b_sub8x8, int block);
 static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
@@ -1290,6 +1293,10 @@ static void update_state_supertx(VP10_COMP *cpi, ThreadData *td,
         mbmi->inter_tx_size[(idy << 3) + idx] = mbmi->tx_size;
   }
 #endif  // CONFIG_VAR_TX
+#if CONFIG_OBMC
+  // Turn OBMC off for supertx
+  mbmi->obmc = 0;
+#endif  // CONFIG_OBMC
 
   if (!output_enabled)
     return;
@@ -1801,29 +1808,43 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td
                               [ref0 != GOLDEN_FRAME]++;
 #endif  // CONFIG_EXT_REFS
         }
+
 #if CONFIG_OBMC
 #if CONFIG_SUPERTX
         if (!supertx_enabled)
 #endif  // CONFIG_SUPERTX
-        if (is_obmc_allowed(mbmi))
-          counts->obmc[mbmi->sb_type][mbmi->obmc]++;
+          if (is_obmc_allowed(mbmi))
+            counts->obmc[mbmi->sb_type][mbmi->obmc]++;
 #endif  // CONFIG_OBMC
       }
     }
 
 #if CONFIG_EXT_INTER
     if (cm->reference_mode != COMPOUND_REFERENCE &&
+#if CONFIG_OBMC
+        !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
+#endif
 #if CONFIG_SUPERTX
-       !supertx_enabled &&
+        !supertx_enabled &&
 #endif
-       is_interintra_allowed(mbmi)) {
+        is_interintra_allowed(mbmi)) {
       if (mbmi->ref_frame[1] == INTRA_FRAME) {
         counts->y_mode[size_group_lookup[bsize]][mbmi->interintra_mode]++;
         counts->interintra[bsize][1]++;
+        if (get_wedge_bits(bsize))
+          counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
       } else {
         counts->interintra[bsize][0]++;
       }
     }
+    if (cm->reference_mode != SINGLE_REFERENCE &&
+        is_inter_compound_mode(mbmi->mode) &&
+#if CONFIG_OBMC
+        !(is_obmc_allowed(mbmi) && mbmi->obmc) &&
+#endif  // CONFIG_OBMC
+        get_wedge_bits(bsize)) {
+      counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
+    }
 #endif  // CONFIG_EXT_INTER
 
     if (inter_block &&
@@ -4458,15 +4479,19 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
 #if CONFIG_OBMC
     if (mbmi->obmc) {
 #if CONFIG_VP9_HIGHBITDEPTH
-      DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
-      DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]);
+      DECLARE_ALIGNED(16, uint8_t,
+                      tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
+      DECLARE_ALIGNED(16, uint8_t,
+                      tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
 #else
-      DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
-      DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
+      DECLARE_ALIGNED(16, uint8_t,
+                      tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
+      DECLARE_ALIGNED(16, uint8_t,
+                      tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
       uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
-      int dst_stride1[MAX_MB_PLANE] = {64, 64, 64};
-      int dst_stride2[MAX_MB_PLANE] = {64, 64, 64};
+      int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
+      int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE};
 
       assert(mbmi->sb_type >= BLOCK_8X8);
 
@@ -4474,23 +4499,24 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         int len = sizeof(uint16_t);
         dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
-        dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len);
-        dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len);
+        dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len);
+        dst_buf1[2] = CONVERT_TO_BYTEPTR(
+            tmp_buf1 + CU_SIZE * CU_SIZE * 2 * len);
         dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
-        dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len);
-        dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len);
+        dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len);
+        dst_buf2[2] = CONVERT_TO_BYTEPTR(
+            tmp_buf2 + CU_SIZE * CU_SIZE * 2 * len);
       } else {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
       dst_buf1[0] = tmp_buf1;
-      dst_buf1[1] = tmp_buf1 + 4096;
-      dst_buf1[2] = tmp_buf1 + 8192;
+      dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE;
+      dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2;
       dst_buf2[0] = tmp_buf2;
-      dst_buf2[1] = tmp_buf2 + 4096;
-      dst_buf2[2] = tmp_buf2 + 8192;
+      dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE;
+      dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2;
 #if CONFIG_VP9_HIGHBITDEPTH
       }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-
       vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1,
                                            dst_stride1);
       vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2,
@@ -4501,7 +4527,6 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
                                        dst_buf1, dst_stride1,
                                        dst_buf2, dst_stride2);
     }
-
 #endif  // CONFIG_OBMC
 
     vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
@@ -4695,6 +4720,9 @@ static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
 }
 
 static void predict_superblock(VP10_COMP *cpi, ThreadData *td,
+#if CONFIG_EXT_INTER
+                               int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
                                int mi_row_pred, int mi_col_pred,
                                BLOCK_SIZE bsize_pred, int b_sub8x8, int block) {
   // Used in supertx
@@ -4719,10 +4747,19 @@ static void predict_superblock(VP10_COMP *cpi, ThreadData *td,
   }
 
   if (!b_sub8x8)
-    vp10_build_inter_predictors_sb(xd, mi_row_pred, mi_col_pred, bsize_pred);
+    vp10_build_inter_predictors_sb_extend(
+        xd,
+#if CONFIG_EXT_INTER
+        mi_row_ori, mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+        mi_row_pred, mi_col_pred, bsize_pred);
   else
-    vp10_build_inter_predictors_sb_sub8x8(xd, mi_row_pred, mi_col_pred,
-                                          bsize_pred, block);
+    vp10_build_inter_predictors_sb_sub8x8_extend(
+        xd,
+#if CONFIG_EXT_INTER
+        mi_row_ori, mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+        mi_row_pred, mi_col_pred, bsize_pred, block);
 }
 
 static void predict_b_extend(VP10_COMP *cpi, ThreadData *td,
@@ -4773,6 +4810,9 @@ static void predict_b_extend(VP10_COMP *cpi, ThreadData *td,
                          (c >> xd->plane[2].subsampling_x);
 
   predict_superblock(cpi, td,
+#if CONFIG_EXT_INTER
+                     mi_row_ori, mi_col_ori,
+#endif  // CONFIG_EXT_INTER
                      mi_row_pred, mi_col_pred, bsize_pred,
                      b_sub8x8, block);
 
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index ac8d2770c9a68896d52c40ba139d5d160cbed0ef..34dd8d54b145ea20bde0ddf44828e62822d9a7a0 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -349,6 +349,9 @@ void vp10_initialize_enc(void) {
     vp10_entropy_mv_init();
     vp10_temporal_filter_init();
     vp10_encode_token_init();
+#if CONFIG_EXT_INTER
+    vp10_init_wedge_masks();
+#endif
     init_done = 1;
   }
 }
@@ -1038,6 +1041,19 @@ static void fnname##_bits12(const uint8_t *src_ptr, \
   sad_array[i] >>= 4; \
 }
 
+#if CONFIG_EXT_PARTITION
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad128x128)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad128x128_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad128x128x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad128x128x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad128x128x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad128x64)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad128x64_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad128x64x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x128)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x128_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x128x4d)
+#endif  // CONFIG_EXT_PARTITION
 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
@@ -1094,6 +1110,61 @@ MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad4x4x3)
 MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x4x8)
 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
 
+#if CONFIG_EXT_INTER
+#define HIGHBD_MBFP(BT, MSDF, MVF, MSVF)         \
+  cpi->fn_ptr[BT].msdf            = MSDF; \
+  cpi->fn_ptr[BT].mvf             = MVF;  \
+  cpi->fn_ptr[BT].msvf            = MSVF;
+
+#define MAKE_MBFP_SAD_WRAPPER(fnname) \
+static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
+                                   int source_stride, \
+                                   const uint8_t *ref_ptr, \
+                                   int ref_stride, \
+                                   const uint8_t *m, \
+                                   int m_stride) {  \
+  return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+                m, m_stride); \
+} \
+static unsigned int fnname##_bits10(const uint8_t *src_ptr, \
+                                    int source_stride, \
+                                    const uint8_t *ref_ptr, \
+                                    int ref_stride, \
+                                    const uint8_t *m, \
+                                    int m_stride) {  \
+  return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+                m, m_stride) >> 2; \
+} \
+static unsigned int fnname##_bits12(const uint8_t *src_ptr, \
+                                    int source_stride, \
+                                    const uint8_t *ref_ptr, \
+                                    int ref_stride, \
+                                    const uint8_t *m, \
+                                    int m_stride) {  \
+  return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+                m, m_stride) >> 4; \
+}
+
+#if CONFIG_EXT_PARTITION
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad128x128)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad128x64)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x128)
+#endif  // CONFIG_EXT_PARTITION
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x64)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x32)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x64)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x32)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x16)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x32)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x16)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x8)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x16)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x8)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x4)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x8)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
+#endif  // CONFIG_EXT_INTER
+
 static void  highbd_set_var_fns(VP10_COMP *const cpi) {
   VP10_COMMON *const cm = &cpi->common;
   if (cm->use_highbitdepth) {
@@ -1228,6 +1299,107 @@ static void  highbd_set_var_fns(VP10_COMP *const cpi) {
                    vpx_highbd_sad4x4x3_bits8,
                    vpx_highbd_sad4x4x8_bits8,
                    vpx_highbd_sad4x4x4d_bits8)
+
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X128,
+                   vpx_highbd_sad128x128_bits8,
+                   vpx_highbd_sad128x128_avg_bits8,
+                   vpx_highbd_8_variance128x128,
+                   vpx_highbd_8_sub_pixel_variance128x128,
+                   vpx_highbd_8_sub_pixel_avg_variance128x128,
+                   vpx_highbd_sad128x128x3_bits8,
+                   vpx_highbd_sad128x128x8_bits8,
+                   vpx_highbd_sad128x128x4d_bits8)
+
+        HIGHBD_BFP(BLOCK_128X64,
+                   vpx_highbd_sad128x64_bits8,
+                   vpx_highbd_sad128x64_avg_bits8,
+                   vpx_highbd_8_variance128x64,
+                   vpx_highbd_8_sub_pixel_variance128x64,
+                   vpx_highbd_8_sub_pixel_avg_variance128x64,
+                   NULL,
+                   NULL,
+                   vpx_highbd_sad128x64x4d_bits8)
+
+        HIGHBD_BFP(BLOCK_64X128,
+                   vpx_highbd_sad64x128_bits8,
+                   vpx_highbd_sad64x128_avg_bits8,
+                   vpx_highbd_8_variance64x128,
+                   vpx_highbd_8_sub_pixel_variance64x128,
+                   vpx_highbd_8_sub_pixel_avg_variance64x128,
+                   NULL,
+                   NULL,
+                   vpx_highbd_sad64x128x4d_bits8)
+#endif  // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X128,
+                    vpx_highbd_masked_sad128x128_bits8,
+                    vpx_highbd_masked_variance128x128,
+                    vpx_highbd_masked_sub_pixel_variance128x128)
+        HIGHBD_MBFP(BLOCK_128X64,
+                    vpx_highbd_masked_sad128x64_bits8,
+                    vpx_highbd_masked_variance128x64,
+                    vpx_highbd_masked_sub_pixel_variance128x64)
+        HIGHBD_MBFP(BLOCK_64X128,
+                    vpx_highbd_masked_sad64x128_bits8,
+                    vpx_highbd_masked_variance64x128,
+                    vpx_highbd_masked_sub_pixel_variance64x128)
+#endif  // CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_64X64,
+                    vpx_highbd_masked_sad64x64_bits8,
+                    vpx_highbd_masked_variance64x64,
+                    vpx_highbd_masked_sub_pixel_variance64x64)
+        HIGHBD_MBFP(BLOCK_64X32,
+                    vpx_highbd_masked_sad64x32_bits8,
+                    vpx_highbd_masked_variance64x32,
+                    vpx_highbd_masked_sub_pixel_variance64x32)
+        HIGHBD_MBFP(BLOCK_32X64,
+                    vpx_highbd_masked_sad32x64_bits8,
+                    vpx_highbd_masked_variance32x64,
+                    vpx_highbd_masked_sub_pixel_variance32x64)
+        HIGHBD_MBFP(BLOCK_32X32,
+                    vpx_highbd_masked_sad32x32_bits8,
+                    vpx_highbd_masked_variance32x32,
+                    vpx_highbd_masked_sub_pixel_variance32x32)
+        HIGHBD_MBFP(BLOCK_32X16,
+                    vpx_highbd_masked_sad32x16_bits8,
+                    vpx_highbd_masked_variance32x16,
+                    vpx_highbd_masked_sub_pixel_variance32x16)
+        HIGHBD_MBFP(BLOCK_16X32,
+                    vpx_highbd_masked_sad16x32_bits8,
+                    vpx_highbd_masked_variance16x32,
+                    vpx_highbd_masked_sub_pixel_variance16x32)
+        HIGHBD_MBFP(BLOCK_16X16,
+                    vpx_highbd_masked_sad16x16_bits8,
+                    vpx_highbd_masked_variance16x16,
+                    vpx_highbd_masked_sub_pixel_variance16x16)
+        HIGHBD_MBFP(BLOCK_8X16,
+                    vpx_highbd_masked_sad8x16_bits8,
+                    vpx_highbd_masked_variance8x16,
+                    vpx_highbd_masked_sub_pixel_variance8x16)
+        HIGHBD_MBFP(BLOCK_16X8,
+                    vpx_highbd_masked_sad16x8_bits8,
+                    vpx_highbd_masked_variance16x8,
+                    vpx_highbd_masked_sub_pixel_variance16x8)
+        HIGHBD_MBFP(BLOCK_8X8,
+                    vpx_highbd_masked_sad8x8_bits8,
+                    vpx_highbd_masked_variance8x8,
+                    vpx_highbd_masked_sub_pixel_variance8x8)
+        HIGHBD_MBFP(BLOCK_4X8,
+                    vpx_highbd_masked_sad4x8_bits8,
+                    vpx_highbd_masked_variance4x8,
+                    vpx_highbd_masked_sub_pixel_variance4x8)
+        HIGHBD_MBFP(BLOCK_8X4,
+                    vpx_highbd_masked_sad8x4_bits8,
+                    vpx_highbd_masked_variance8x4,
+                    vpx_highbd_masked_sub_pixel_variance8x4)
+        HIGHBD_MBFP(BLOCK_4X4,
+                    vpx_highbd_masked_sad4x4_bits8,
+                    vpx_highbd_masked_variance4x4,
+                    vpx_highbd_masked_sub_pixel_variance4x4)
+#endif  // CONFIG_EXT_INTER
         break;
 
       case VPX_BITS_10:
@@ -1360,6 +1532,107 @@ static void  highbd_set_var_fns(VP10_COMP *const cpi) {
                    vpx_highbd_sad4x4x3_bits10,
                    vpx_highbd_sad4x4x8_bits10,
                    vpx_highbd_sad4x4x4d_bits10)
+
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X128,
+                   vpx_highbd_sad128x128_bits10,
+                   vpx_highbd_sad128x128_avg_bits10,
+                   vpx_highbd_10_variance128x128,
+                   vpx_highbd_10_sub_pixel_variance128x128,
+                   vpx_highbd_10_sub_pixel_avg_variance128x128,
+                   vpx_highbd_sad128x128x3_bits10,
+                   vpx_highbd_sad128x128x8_bits10,
+                   vpx_highbd_sad128x128x4d_bits10)
+
+        HIGHBD_BFP(BLOCK_128X64,
+                   vpx_highbd_sad128x64_bits10,
+                   vpx_highbd_sad128x64_avg_bits10,
+                   vpx_highbd_10_variance128x64,
+                   vpx_highbd_10_sub_pixel_variance128x64,
+                   vpx_highbd_10_sub_pixel_avg_variance128x64,
+                   NULL,
+                   NULL,
+                   vpx_highbd_sad128x64x4d_bits10)
+
+        HIGHBD_BFP(BLOCK_64X128,
+                   vpx_highbd_sad64x128_bits10,
+                   vpx_highbd_sad64x128_avg_bits10,
+                   vpx_highbd_10_variance64x128,
+                   vpx_highbd_10_sub_pixel_variance64x128,
+                   vpx_highbd_10_sub_pixel_avg_variance64x128,
+                   NULL,
+                   NULL,
+                   vpx_highbd_sad64x128x4d_bits10)
+#endif  // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X128,
+                    vpx_highbd_masked_sad128x128_bits10,
+                    vpx_highbd_10_masked_variance128x128,
+                    vpx_highbd_10_masked_sub_pixel_variance128x128)
+        HIGHBD_MBFP(BLOCK_128X64,
+                    vpx_highbd_masked_sad128x64_bits10,
+                    vpx_highbd_10_masked_variance128x64,
+                    vpx_highbd_10_masked_sub_pixel_variance128x64)
+        HIGHBD_MBFP(BLOCK_64X128,
+                    vpx_highbd_masked_sad64x128_bits10,
+                    vpx_highbd_10_masked_variance64x128,
+                    vpx_highbd_10_masked_sub_pixel_variance64x128)
+#endif  // CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_64X64,
+                    vpx_highbd_masked_sad64x64_bits10,
+                    vpx_highbd_10_masked_variance64x64,
+                    vpx_highbd_10_masked_sub_pixel_variance64x64)
+        HIGHBD_MBFP(BLOCK_64X32,
+                    vpx_highbd_masked_sad64x32_bits10,
+                    vpx_highbd_10_masked_variance64x32,
+                    vpx_highbd_10_masked_sub_pixel_variance64x32)
+        HIGHBD_MBFP(BLOCK_32X64,
+                    vpx_highbd_masked_sad32x64_bits10,
+                    vpx_highbd_10_masked_variance32x64,
+                    vpx_highbd_10_masked_sub_pixel_variance32x64)
+        HIGHBD_MBFP(BLOCK_32X32,
+                    vpx_highbd_masked_sad32x32_bits10,
+                    vpx_highbd_10_masked_variance32x32,
+                    vpx_highbd_10_masked_sub_pixel_variance32x32)
+        HIGHBD_MBFP(BLOCK_32X16,
+                    vpx_highbd_masked_sad32x16_bits10,
+                    vpx_highbd_10_masked_variance32x16,
+                    vpx_highbd_10_masked_sub_pixel_variance32x16)
+        HIGHBD_MBFP(BLOCK_16X32,
+                    vpx_highbd_masked_sad16x32_bits10,
+                    vpx_highbd_10_masked_variance16x32,
+                    vpx_highbd_10_masked_sub_pixel_variance16x32)
+        HIGHBD_MBFP(BLOCK_16X16,
+                    vpx_highbd_masked_sad16x16_bits10,
+                    vpx_highbd_10_masked_variance16x16,
+                    vpx_highbd_10_masked_sub_pixel_variance16x16)
+        HIGHBD_MBFP(BLOCK_8X16,
+                    vpx_highbd_masked_sad8x16_bits10,
+                    vpx_highbd_10_masked_variance8x16,
+                    vpx_highbd_10_masked_sub_pixel_variance8x16)
+        HIGHBD_MBFP(BLOCK_16X8,
+                    vpx_highbd_masked_sad16x8_bits10,
+                    vpx_highbd_10_masked_variance16x8,
+                    vpx_highbd_10_masked_sub_pixel_variance16x8)
+        HIGHBD_MBFP(BLOCK_8X8,
+                    vpx_highbd_masked_sad8x8_bits10,
+                    vpx_highbd_10_masked_variance8x8,
+                    vpx_highbd_10_masked_sub_pixel_variance8x8)
+        HIGHBD_MBFP(BLOCK_4X8,
+                    vpx_highbd_masked_sad4x8_bits10,
+                    vpx_highbd_10_masked_variance4x8,
+                    vpx_highbd_10_masked_sub_pixel_variance4x8)
+        HIGHBD_MBFP(BLOCK_8X4,
+                    vpx_highbd_masked_sad8x4_bits10,
+                    vpx_highbd_10_masked_variance8x4,
+                    vpx_highbd_10_masked_sub_pixel_variance8x4)
+        HIGHBD_MBFP(BLOCK_4X4,
+                    vpx_highbd_masked_sad4x4_bits10,
+                    vpx_highbd_10_masked_variance4x4,
+                    vpx_highbd_10_masked_sub_pixel_variance4x4)
+#endif  // CONFIG_EXT_INTER
         break;
 
       case VPX_BITS_12:
@@ -1492,6 +1765,107 @@ static void  highbd_set_var_fns(VP10_COMP *const cpi) {
                    vpx_highbd_sad4x4x3_bits12,
                    vpx_highbd_sad4x4x8_bits12,
                    vpx_highbd_sad4x4x4d_bits12)
+
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X128,
+                   vpx_highbd_sad128x128_bits12,
+                   vpx_highbd_sad128x128_avg_bits12,
+                   vpx_highbd_12_variance128x128,
+                   vpx_highbd_12_sub_pixel_variance128x128,
+                   vpx_highbd_12_sub_pixel_avg_variance128x128,
+                   vpx_highbd_sad128x128x3_bits12,
+                   vpx_highbd_sad128x128x8_bits12,
+                   vpx_highbd_sad128x128x4d_bits12)
+
+        HIGHBD_BFP(BLOCK_128X64,
+                   vpx_highbd_sad128x64_bits12,
+                   vpx_highbd_sad128x64_avg_bits12,
+                   vpx_highbd_12_variance128x64,
+                   vpx_highbd_12_sub_pixel_variance128x64,
+                   vpx_highbd_12_sub_pixel_avg_variance128x64,
+                   NULL,
+                   NULL,
+                   vpx_highbd_sad128x64x4d_bits12)
+
+        HIGHBD_BFP(BLOCK_64X128,
+                   vpx_highbd_sad64x128_bits12,
+                   vpx_highbd_sad64x128_avg_bits12,
+                   vpx_highbd_12_variance64x128,
+                   vpx_highbd_12_sub_pixel_variance64x128,
+                   vpx_highbd_12_sub_pixel_avg_variance64x128,
+                   NULL,
+                   NULL,
+                   vpx_highbd_sad64x128x4d_bits12)
+#endif  // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X128,
+                    vpx_highbd_masked_sad128x128_bits12,
+                    vpx_highbd_12_masked_variance128x128,
+                    vpx_highbd_12_masked_sub_pixel_variance128x128)
+        HIGHBD_MBFP(BLOCK_128X64,
+                    vpx_highbd_masked_sad128x64_bits12,
+                    vpx_highbd_12_masked_variance128x64,
+                    vpx_highbd_12_masked_sub_pixel_variance128x64)
+        HIGHBD_MBFP(BLOCK_64X128,
+                    vpx_highbd_masked_sad64x128_bits12,
+                    vpx_highbd_12_masked_variance64x128,
+                    vpx_highbd_12_masked_sub_pixel_variance64x128)
+#endif  // CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_64X64,
+                    vpx_highbd_masked_sad64x64_bits12,
+                    vpx_highbd_12_masked_variance64x64,
+                    vpx_highbd_12_masked_sub_pixel_variance64x64)
+        HIGHBD_MBFP(BLOCK_64X32,
+                    vpx_highbd_masked_sad64x32_bits12,
+                    vpx_highbd_12_masked_variance64x32,
+                    vpx_highbd_12_masked_sub_pixel_variance64x32)
+        HIGHBD_MBFP(BLOCK_32X64,
+                    vpx_highbd_masked_sad32x64_bits12,
+                    vpx_highbd_12_masked_variance32x64,
+                    vpx_highbd_12_masked_sub_pixel_variance32x64)
+        HIGHBD_MBFP(BLOCK_32X32,
+                    vpx_highbd_masked_sad32x32_bits12,
+                    vpx_highbd_12_masked_variance32x32,
+                    vpx_highbd_12_masked_sub_pixel_variance32x32)
+        HIGHBD_MBFP(BLOCK_32X16,
+                    vpx_highbd_masked_sad32x16_bits12,
+                    vpx_highbd_12_masked_variance32x16,
+                    vpx_highbd_12_masked_sub_pixel_variance32x16)
+        HIGHBD_MBFP(BLOCK_16X32,
+                    vpx_highbd_masked_sad16x32_bits12,
+                    vpx_highbd_12_masked_variance16x32,
+                    vpx_highbd_12_masked_sub_pixel_variance16x32)
+        HIGHBD_MBFP(BLOCK_16X16,
+                    vpx_highbd_masked_sad16x16_bits12,
+                    vpx_highbd_12_masked_variance16x16,
+                    vpx_highbd_12_masked_sub_pixel_variance16x16)
+        HIGHBD_MBFP(BLOCK_8X16,
+                    vpx_highbd_masked_sad8x16_bits12,
+                    vpx_highbd_12_masked_variance8x16,
+                    vpx_highbd_12_masked_sub_pixel_variance8x16)
+        HIGHBD_MBFP(BLOCK_16X8,
+                    vpx_highbd_masked_sad16x8_bits12,
+                    vpx_highbd_12_masked_variance16x8,
+                    vpx_highbd_12_masked_sub_pixel_variance16x8)
+        HIGHBD_MBFP(BLOCK_8X8,
+                    vpx_highbd_masked_sad8x8_bits12,
+                    vpx_highbd_12_masked_variance8x8,
+                    vpx_highbd_12_masked_sub_pixel_variance8x8)
+        HIGHBD_MBFP(BLOCK_4X8,
+                    vpx_highbd_masked_sad4x8_bits12,
+                    vpx_highbd_12_masked_variance4x8,
+                    vpx_highbd_12_masked_sub_pixel_variance4x8)
+        HIGHBD_MBFP(BLOCK_8X4,
+                    vpx_highbd_masked_sad8x4_bits12,
+                    vpx_highbd_12_masked_variance8x4,
+                    vpx_highbd_12_masked_sub_pixel_variance8x4)
+        HIGHBD_MBFP(BLOCK_4X4,
+                    vpx_highbd_masked_sad4x4_bits12,
+                    vpx_highbd_12_masked_variance4x4,
+                    vpx_highbd_12_masked_sub_pixel_variance4x4)
+#endif  // CONFIG_EXT_INTER
         break;
 
       default:
@@ -1912,6 +2286,21 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
     cpi->fn_ptr[BT].sdx8f          = SDX8F; \
     cpi->fn_ptr[BT].sdx4df         = SDX4DF;
 
+#if CONFIG_EXT_PARTITION
+  BFP(BLOCK_128X128, vpx_sad128x128, vpx_sad128x128_avg,
+      vpx_variance128x128, vpx_sub_pixel_variance128x128,
+      vpx_sub_pixel_avg_variance128x128, vpx_sad128x128x3, vpx_sad128x128x8,
+      vpx_sad128x128x4d)
+
+  BFP(BLOCK_128X64, vpx_sad128x64, vpx_sad128x64_avg,
+      vpx_variance128x64, vpx_sub_pixel_variance128x64,
+      vpx_sub_pixel_avg_variance128x64, NULL, NULL, vpx_sad128x64x4d)
+
+  BFP(BLOCK_64X128, vpx_sad64x128, vpx_sad64x128_avg,
+      vpx_variance64x128, vpx_sub_pixel_variance64x128,
+      vpx_sub_pixel_avg_variance64x128, NULL, NULL, vpx_sad64x128x4d)
+#endif  // CONFIG_EXT_PARTITION
+
   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg,
       vpx_variance32x16, vpx_sub_pixel_variance32x16,
       vpx_sub_pixel_avg_variance32x16, NULL, NULL, vpx_sad32x16x4d)
@@ -1971,6 +2360,48 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
       vpx_sub_pixel_avg_variance4x4,
       vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d)
 
+#if CONFIG_EXT_INTER
+#define MBFP(BT, MSDF, MVF, MSVF)         \
+  cpi->fn_ptr[BT].msdf            = MSDF; \
+  cpi->fn_ptr[BT].mvf             = MVF;  \
+  cpi->fn_ptr[BT].msvf            = MSVF;
+
+#if CONFIG_EXT_PARTITION
+  MBFP(BLOCK_128X128, vpx_masked_sad128x128, vpx_masked_variance128x128,
+       vpx_masked_sub_pixel_variance128x128)
+  MBFP(BLOCK_128X64, vpx_masked_sad128x64, vpx_masked_variance128x64,
+       vpx_masked_sub_pixel_variance128x64)
+  MBFP(BLOCK_64X128, vpx_masked_sad64x128, vpx_masked_variance64x128,
+       vpx_masked_sub_pixel_variance64x128)
+#endif  // CONFIG_EXT_PARTITION
+  MBFP(BLOCK_64X64, vpx_masked_sad64x64, vpx_masked_variance64x64,
+       vpx_masked_sub_pixel_variance64x64)
+  MBFP(BLOCK_64X32, vpx_masked_sad64x32, vpx_masked_variance64x32,
+       vpx_masked_sub_pixel_variance64x32)
+  MBFP(BLOCK_32X64, vpx_masked_sad32x64, vpx_masked_variance32x64,
+       vpx_masked_sub_pixel_variance32x64)
+  MBFP(BLOCK_32X32, vpx_masked_sad32x32, vpx_masked_variance32x32,
+       vpx_masked_sub_pixel_variance32x32)
+  MBFP(BLOCK_32X16, vpx_masked_sad32x16, vpx_masked_variance32x16,
+       vpx_masked_sub_pixel_variance32x16)
+  MBFP(BLOCK_16X32, vpx_masked_sad16x32, vpx_masked_variance16x32,
+       vpx_masked_sub_pixel_variance16x32)
+  MBFP(BLOCK_16X16, vpx_masked_sad16x16, vpx_masked_variance16x16,
+       vpx_masked_sub_pixel_variance16x16)
+  MBFP(BLOCK_16X8, vpx_masked_sad16x8, vpx_masked_variance16x8,
+       vpx_masked_sub_pixel_variance16x8)
+  MBFP(BLOCK_8X16, vpx_masked_sad8x16, vpx_masked_variance8x16,
+       vpx_masked_sub_pixel_variance8x16)
+  MBFP(BLOCK_8X8, vpx_masked_sad8x8, vpx_masked_variance8x8,
+       vpx_masked_sub_pixel_variance8x8)
+  MBFP(BLOCK_4X8, vpx_masked_sad4x8, vpx_masked_variance4x8,
+       vpx_masked_sub_pixel_variance4x8)
+  MBFP(BLOCK_8X4, vpx_masked_sad8x4, vpx_masked_variance8x4,
+       vpx_masked_sub_pixel_variance8x4)
+  MBFP(BLOCK_4X4, vpx_masked_sad4x4, vpx_masked_variance4x4,
+       vpx_masked_sub_pixel_variance4x4)
+#endif  // CONFIG_EXT_INTER
+
 #if CONFIG_VP9_HIGHBITDEPTH
   highbd_set_var_fns(cpi);
 #endif
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index efde0fc315d43b7d04ebf42d86229c56187d54ed..49cac0cfd4f548de90bdda978aaa0f9ec5382634 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -422,7 +422,7 @@ typedef struct VP10_COMP {
   fractional_mv_step_fp *find_fractional_mv_step;
   vp10_full_search_fn_t full_search_sad;
   vp10_diamond_search_fn_t diamond_search_sad;
-  vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
+  vp10_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
   uint64_t time_receive_data;
   uint64_t time_compress_data;
   uint64_t time_pick_lpf;
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c
index 0404e277b731d735958a7398a46adf7e98ce0c82..dd3e4378a8dac0f791a17164fbfc74d27179a8f5 100644
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -388,7 +388,7 @@ static void first_pass_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
   MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3};
   int num00, tmp_err, n;
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-  vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
+  vp10_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
   const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
 
   int step_param = 3;
diff --git a/vp10/encoder/mbgraph.c b/vp10/encoder/mbgraph.c
index 1f467b811bebb6400d8f79734dda6b3254f9ffde..32ff0faf67f465efbf2cbffdd47985bae9b0517e 100644
--- a/vp10/encoder/mbgraph.c
+++ b/vp10/encoder/mbgraph.c
@@ -31,7 +31,7 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
   MACROBLOCK *const x = &cpi->td.mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
-  const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
+  const vp10_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
 
   const int tmp_col_min = x->mv_col_min;
   const int tmp_col_max = x->mv_col_max;
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 8949f76bc985beeefe4371bcb8972633b69563dd..2c9397640df9ff66886f88d61e45a85c151fd58c 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -354,7 +354,7 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
                                        const MV *bestmv,
                                        const MV *ref_mv,
                                        int error_per_bit,
-                                       const vp9_variance_fn_ptr_t *vfp,
+                                       const vp10_variance_fn_ptr_t *vfp,
                                        const uint8_t *const src,
                                        const int src_stride,
                                        const uint8_t *const y,
@@ -430,7 +430,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore(
     MV *bestmv, const MV *ref_mv,
     int allow_hp,
     int error_per_bit,
-    const vp9_variance_fn_ptr_t *vfp,
+    const vp10_variance_fn_ptr_t *vfp,
     int forced_stop,
     int iters_per_step,
     int *cost_list,
@@ -516,7 +516,7 @@ int vp10_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
                                              MV *bestmv, const MV *ref_mv,
                                              int allow_hp,
                                              int error_per_bit,
-                                             const vp9_variance_fn_ptr_t *vfp,
+                                             const vp10_variance_fn_ptr_t *vfp,
                                              int forced_stop,
                                              int iters_per_step,
                                              int *cost_list,
@@ -599,7 +599,7 @@ int vp10_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
                                         MV *bestmv, const MV *ref_mv,
                                         int allow_hp,
                                         int error_per_bit,
-                                        const vp9_variance_fn_ptr_t *vfp,
+                                        const vp10_variance_fn_ptr_t *vfp,
                                         int forced_stop,
                                         int iters_per_step,
                                         int *cost_list,
@@ -748,7 +748,7 @@ static void highbd_upsampled_pred(uint16_t *comp_pred,
 #endif
 
 static int upsampled_pref_error(const MACROBLOCKD *xd,
-                                const vp9_variance_fn_ptr_t *vfp,
+                                const vp10_variance_fn_ptr_t *vfp,
                                 const uint8_t *const src, const int src_stride,
                                 const uint8_t *const y, int y_stride,
                                 const uint8_t *second_pred,
@@ -786,7 +786,7 @@ return besterr;
 
 static unsigned int upsampled_setup_center_error(
     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
-    int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
+    int error_per_bit, const vp10_variance_fn_ptr_t *vfp,
     const uint8_t *const src, const int src_stride,
     const uint8_t *const y, int y_stride, const uint8_t *second_pred,
     int w, int h, int offset, int *mvjcost, int *mvcost[2],
@@ -804,7 +804,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                  MV *bestmv, const MV *ref_mv,
                                  int allow_hp,
                                  int error_per_bit,
-                                 const vp9_variance_fn_ptr_t *vfp,
+                                 const vp10_variance_fn_ptr_t *vfp,
                                  int forced_stop,
                                  int iters_per_step,
                                  int *cost_list,
@@ -1037,7 +1037,7 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
 static INLINE void calc_int_cost_list(const MACROBLOCK *x,
                                       const MV *ref_mv,
                                       int sadpb,
-                                      const vp9_variance_fn_ptr_t *fn_ptr,
+                                      const vp10_variance_fn_ptr_t *fn_ptr,
                                       const MV *best_mv,
                                       int *cost_list) {
   static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
@@ -1095,7 +1095,7 @@ static int vp10_pattern_search(const MACROBLOCK *x,
                               int sad_per_bit,
                               int do_init_search,
                               int *cost_list,
-                              const vp9_variance_fn_ptr_t *vfp,
+                              const vp10_variance_fn_ptr_t *vfp,
                               int use_mvcost,
                               const MV *center_mv,
                               MV *best_mv,
@@ -1270,7 +1270,7 @@ static int vp10_pattern_search_sad(const MACROBLOCK *x,
                                   int sad_per_bit,
                                   int do_init_search,
                                   int *cost_list,
-                                  const vp9_variance_fn_ptr_t *vfp,
+                                  const vp10_variance_fn_ptr_t *vfp,
                                   int use_mvcost,
                                   const MV *center_mv,
                                   MV *best_mv,
@@ -1552,7 +1552,7 @@ static int vp10_pattern_search_sad(const MACROBLOCK *x,
 
 int vp10_get_mvpred_var(const MACROBLOCK *x,
                        const MV *best_mv, const MV *center_mv,
-                       const vp9_variance_fn_ptr_t *vfp,
+                       const vp10_variance_fn_ptr_t *vfp,
                        int use_mvcost) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct buf_2d *const what = &x->plane[0].src;
@@ -1569,7 +1569,7 @@ int vp10_get_mvpred_var(const MACROBLOCK *x,
 int vp10_get_mvpred_av_var(const MACROBLOCK *x,
                           const MV *best_mv, const MV *center_mv,
                           const uint8_t *second_pred,
-                          const vp9_variance_fn_ptr_t *vfp,
+                          const vp10_variance_fn_ptr_t *vfp,
                           int use_mvcost) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct buf_2d *const what = &x->plane[0].src;
@@ -1589,7 +1589,7 @@ int vp10_hex_search(const MACROBLOCK *x,
                    int sad_per_bit,
                    int do_init_search,
                    int *cost_list,
-                   const vp9_variance_fn_ptr_t *vfp,
+                   const vp10_variance_fn_ptr_t *vfp,
                    int use_mvcost,
                    const MV *center_mv, MV *best_mv) {
   // First scale has 8-closest points, the rest have 6 points in hex shape
@@ -1624,7 +1624,7 @@ int vp10_bigdia_search(const MACROBLOCK *x,
                       int sad_per_bit,
                       int do_init_search,
                       int *cost_list,
-                      const vp9_variance_fn_ptr_t *vfp,
+                      const vp10_variance_fn_ptr_t *vfp,
                       int use_mvcost,
                       const MV *center_mv,
                       MV *best_mv) {
@@ -1666,7 +1666,7 @@ int vp10_square_search(const MACROBLOCK *x,
                       int sad_per_bit,
                       int do_init_search,
                       int *cost_list,
-                      const vp9_variance_fn_ptr_t *vfp,
+                      const vp10_variance_fn_ptr_t *vfp,
                       int use_mvcost,
                       const MV *center_mv,
                       MV *best_mv) {
@@ -1708,7 +1708,7 @@ int vp10_fast_hex_search(const MACROBLOCK *x,
                         int sad_per_bit,
                         int do_init_search,  // must be zero for fast_hex
                         int *cost_list,
-                        const vp9_variance_fn_ptr_t *vfp,
+                        const vp10_variance_fn_ptr_t *vfp,
                         int use_mvcost,
                         const MV *center_mv,
                         MV *best_mv) {
@@ -1723,7 +1723,7 @@ int vp10_fast_dia_search(const MACROBLOCK *x,
                         int sad_per_bit,
                         int do_init_search,
                         int *cost_list,
-                        const vp9_variance_fn_ptr_t *vfp,
+                        const vp10_variance_fn_ptr_t *vfp,
                         int use_mvcost,
                         const MV *center_mv,
                         MV *best_mv) {
@@ -1739,7 +1739,7 @@ int vp10_fast_dia_search(const MACROBLOCK *x,
 static int exhuastive_mesh_search(const MACROBLOCK *x,
                                   MV *ref_mv, MV *best_mv,
                                   int range, int step, int sad_per_bit,
-                                  const vp9_variance_fn_ptr_t *fn_ptr,
+                                  const vp10_variance_fn_ptr_t *fn_ptr,
                                   const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct buf_2d *const what = &x->plane[0].src;
@@ -1822,11 +1822,11 @@ static int exhuastive_mesh_search(const MACROBLOCK *x,
 }
 
 int vp10_diamond_search_sad_c(const MACROBLOCK *x,
-                             const search_site_config *cfg,
-                             MV *ref_mv, MV *best_mv, int search_param,
-                             int sad_per_bit, int *num00,
-                             const vp9_variance_fn_ptr_t *fn_ptr,
-                             const MV *center_mv) {
+                              const search_site_config *cfg,
+                              MV *ref_mv, MV *best_mv, int search_param,
+                              int sad_per_bit, int *num00,
+                              const vp10_variance_fn_ptr_t *fn_ptr,
+                              const MV *center_mv) {
   int i, j, step;
 
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2179,7 +2179,7 @@ int vp10_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
                            MV *mvp_full, int step_param,
                            int sadpb, int further_steps, int do_refine,
                            int *cost_list,
-                           const vp9_variance_fn_ptr_t *fn_ptr,
+                           const vp10_variance_fn_ptr_t *fn_ptr,
                            const MV *ref_mv, MV *dst_mv) {
   MV temp_mv;
   int thissme, n, num00 = 0;
@@ -2246,7 +2246,7 @@ int vp10_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
 // according to the encode speed profile.
 static int full_pixel_exhaustive(VP10_COMP *cpi, MACROBLOCK *x,
                                  MV *centre_mv_full, int sadpb,  int *cost_list,
-                                 const vp9_variance_fn_ptr_t *fn_ptr,
+                                 const vp10_variance_fn_ptr_t *fn_ptr,
                                  const MV *ref_mv, MV *dst_mv) {
   const SPEED_FEATURES *const sf = &cpi->sf;
   MV temp_mv = {centre_mv_full->row, centre_mv_full->col};
@@ -2305,7 +2305,7 @@ static int full_pixel_exhaustive(VP10_COMP *cpi, MACROBLOCK *x,
 
 int vp10_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
-                          const vp9_variance_fn_ptr_t *fn_ptr,
+                          const vp10_variance_fn_ptr_t *fn_ptr,
                           const MV *center_mv, MV *best_mv) {
   int r, c;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2338,7 +2338,7 @@ int vp10_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
 
 int vp10_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
-                          const vp9_variance_fn_ptr_t *fn_ptr,
+                          const vp10_variance_fn_ptr_t *fn_ptr,
                           const MV *center_mv, MV *best_mv) {
   int r;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2403,7 +2403,7 @@ int vp10_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
 
 int vp10_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
-                          const vp9_variance_fn_ptr_t *fn_ptr,
+                          const vp10_variance_fn_ptr_t *fn_ptr,
                           const MV *center_mv, MV *best_mv) {
   int r;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2493,7 +2493,7 @@ int vp10_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
 int vp10_refining_search_sad(const MACROBLOCK *x,
                             MV *ref_mv, int error_per_bit,
                             int search_range,
-                            const vp9_variance_fn_ptr_t *fn_ptr,
+                            const vp10_variance_fn_ptr_t *fn_ptr,
                             const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
@@ -2572,7 +2572,7 @@ int vp10_refining_search_sad(const MACROBLOCK *x,
 int vp10_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
                              int search_range,
-                             const vp9_variance_fn_ptr_t *fn_ptr,
+                             const vp10_variance_fn_ptr_t *fn_ptr,
                              const MV *center_mv,
                              const uint8_t *second_pred) {
   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
@@ -2636,7 +2636,7 @@ int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x,
                           int var_max, int rd) {
   const SPEED_FEATURES *const sf = &cpi->sf;
   const SEARCH_METHODS method = sf->mv.search_method;
-  vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+  vp10_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
   int var = 0;
   if (cost_list) {
     cost_list[0] = INT_MAX;
@@ -2707,3 +2707,354 @@ int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x,
 
   return var;
 }
+
+#if CONFIG_EXT_INTER
+/* returns subpixel variance error function */
+#define DIST(r, c) \
+    vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
+              src_stride, mask, mask_stride, &sse)
+
+/* checks if (r, c) has better score than previous best */
+
+#define MVC(r, c)                                       \
+    (mvcost ?                                           \
+     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
+       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
+      error_per_bit + 4096) >> 13 : 0)
+
+#define CHECK_BETTER(v, r, c) \
+  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
+    thismse = (DIST(r, c));                                            \
+    if ((v = MVC(r, c) + thismse) < besterr) {                         \
+      besterr = v;                                                     \
+      br = r;                                                          \
+      bc = c;                                                          \
+      *distortion = thismse;                                           \
+      *sse1 = sse;                                                     \
+    }                                                                  \
+  } else {                                                             \
+    v = INT_MAX;                                                       \
+  }
+
+int vp10_find_best_masked_sub_pixel_tree(const MACROBLOCK *x,
+                                         const uint8_t *mask, int mask_stride,
+                                         MV *bestmv, const MV *ref_mv,
+                                         int allow_hp,
+                                         int error_per_bit,
+                                         const vp10_variance_fn_ptr_t *vfp,
+                                         int forced_stop,
+                                         int iters_per_step,
+                                         int *mvjcost, int *mvcost[2],
+                                         int *distortion,
+                                         unsigned int *sse1, int is_second) {
+  const uint8_t *const z = x->plane[0].src.buf;
+  const int src_stride = x->plane[0].src.stride;
+  const MACROBLOCKD *xd = &x->e_mbd;
+  unsigned int besterr = INT_MAX;
+  unsigned int sse;
+  unsigned int whichdir;
+  int thismse;
+  unsigned int halfiters = iters_per_step;
+  unsigned int quarteriters = iters_per_step;
+  unsigned int eighthiters = iters_per_step;
+
+  const int y_stride = xd->plane[0].pre[is_second].stride;
+  const int offset = bestmv->row * y_stride + bestmv->col;
+  const uint8_t *const y = xd->plane[0].pre[is_second].buf;
+
+  int rr = ref_mv->row;
+  int rc = ref_mv->col;
+  int br = bestmv->row * 8;
+  int bc = bestmv->col * 8;
+  int hstep = 4;
+  const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+  const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+  const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+  const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+
+  int tr = br;
+  int tc = bc;
+
+  // central mv
+  bestmv->row *= 8;
+  bestmv->col *= 8;
+
+  // calculate central point error
+  besterr = vfp->mvf(y + offset, y_stride, z, src_stride, mask, mask_stride,
+                     sse1);
+  *distortion = besterr;
+  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+  // 1/2 pel
+  FIRST_LEVEL_CHECKS;
+  if (halfiters > 1) {
+    SECOND_LEVEL_CHECKS;
+  }
+  tr = br;
+  tc = bc;
+
+  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+  if (forced_stop != 2) {
+    hstep >>= 1;
+    FIRST_LEVEL_CHECKS;
+    if (quarteriters > 1) {
+      SECOND_LEVEL_CHECKS;
+    }
+    tr = br;
+    tc = bc;
+  }
+
+  if (allow_hp && vp10_use_mv_hp(ref_mv) && forced_stop == 0) {
+    hstep >>= 1;
+    FIRST_LEVEL_CHECKS;
+    if (eighthiters > 1) {
+      SECOND_LEVEL_CHECKS;
+    }
+    tr = br;
+    tc = bc;
+  }
+  // These lines insure static analysis doesn't warn that
+  // tr and tc aren't used after the above point.
+  (void) tr;
+  (void) tc;
+
+  bestmv->row = br;
+  bestmv->col = bc;
+
+  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+    return INT_MAX;
+
+  return besterr;
+}
+
+#undef DIST
+#undef MVC
+#undef CHECK_BETTER
+
+static int get_masked_mvpred_var(const MACROBLOCK *x,
+                                 const uint8_t *mask, int mask_stride,
+                                 const MV *best_mv, const MV *center_mv,
+                                 const vp10_variance_fn_ptr_t *vfp,
+                                 int use_mvcost, int is_second) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+  const MV mv = {best_mv->row * 8, best_mv->col * 8};
+  unsigned int unused;
+
+  return vfp->mvf(what->buf, what->stride,
+                  get_buf_from_mv(in_what, best_mv), in_what->stride,
+                  mask, mask_stride, &unused) +
+      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
+                                 x->mvcost, x->errorperbit) : 0);
+}
+
+int masked_refining_search_sad(const MACROBLOCK *x,
+                               const uint8_t *mask, int mask_stride,
+                               MV *ref_mv, int error_per_bit,
+                               int search_range,
+                               const vp10_variance_fn_ptr_t *fn_ptr,
+                               const MV *center_mv, int is_second) {
+  const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+  unsigned int best_sad = fn_ptr->msdf(what->buf, what->stride,
+                                       get_buf_from_mv(in_what, ref_mv),
+                                       in_what->stride, mask, mask_stride) +
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
+  int i, j;
+
+  for (i = 0; i < search_range; i++) {
+    int best_site = -1;
+
+    for (j = 0; j < 4; j++) {
+      const MV mv = {ref_mv->row + neighbors[j].row,
+                     ref_mv->col + neighbors[j].col};
+      if (is_mv_in(x, &mv)) {
+        unsigned int sad = fn_ptr->msdf(what->buf, what->stride,
+            get_buf_from_mv(in_what, &mv), in_what->stride, mask, mask_stride);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
+          if (sad < best_sad) {
+            best_sad = sad;
+            best_site = j;
+          }
+        }
+      }
+    }
+
+    if (best_site == -1) {
+      break;
+    } else {
+      ref_mv->row += neighbors[best_site].row;
+      ref_mv->col += neighbors[best_site].col;
+    }
+  }
+  return best_sad;
+}
+
+int masked_diamond_search_sad(const MACROBLOCK *x,
+                              const search_site_config *cfg,
+                              const uint8_t *mask, int mask_stride,
+                              MV *ref_mv, MV *best_mv,
+                              int search_param,
+                              int sad_per_bit, int *num00,
+                              const vp10_variance_fn_ptr_t *fn_ptr,
+                              const MV *center_mv, int is_second) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+  // search_param determines the length of the initial step and hence the number
+  // of iterations
+  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
+  // (MAX_FIRST_STEP/4) pel... etc.
+  const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+  const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
+  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+  const uint8_t *best_address, *in_what_ref;
+  int best_sad = INT_MAX;
+  int best_site = 0;
+  int last_site = 0;
+  int i, j, step;
+
+  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+  in_what_ref = get_buf_from_mv(in_what, ref_mv);
+  best_address = in_what_ref;
+  *num00 = 0;
+  *best_mv = *ref_mv;
+
+  // Check the starting position
+  best_sad = fn_ptr->msdf(what->buf, what->stride,
+                         best_address, in_what->stride,
+                         mask, mask_stride) +
+      mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+
+  i = 1;
+
+  for (step = 0; step < tot_steps; step++) {
+    for (j = 0; j < cfg->searches_per_step; j++) {
+      const MV mv = {best_mv->row + ss[i].mv.row,
+                     best_mv->col + ss[i].mv.col};
+      if (is_mv_in(x, &mv)) {
+       int sad = fn_ptr->msdf(what->buf, what->stride,
+                              best_address + ss[i].offset, in_what->stride,
+                              mask, mask_stride);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+          if (sad < best_sad) {
+            best_sad = sad;
+            best_site = i;
+          }
+        }
+      }
+
+      i++;
+    }
+
+    if (best_site != last_site) {
+      best_mv->row += ss[best_site].mv.row;
+      best_mv->col += ss[best_site].mv.col;
+      best_address += ss[best_site].offset;
+      last_site = best_site;
+#if defined(NEW_DIAMOND_SEARCH)
+      while (1) {
+        const MV this_mv = {best_mv->row + ss[best_site].mv.row,
+                            best_mv->col + ss[best_site].mv.col};
+        if (is_mv_in(x, &this_mv)) {
+          int sad = fn_ptr->msdf(what->buf, what->stride,
+                                 best_address + ss[best_site].offset,
+                                 in_what->stride, mask, mask_stride);
+          if (sad < best_sad) {
+            sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
+              best_mv->row += ss[best_site].mv.row;
+              best_mv->col += ss[best_site].mv.col;
+              best_address += ss[best_site].offset;
+              continue;
+            }
+          }
+        }
+        break;
+      }
+#endif
+    } else if (best_address == in_what_ref) {
+      (*num00)++;
+    }
+  }
+  return best_sad;
+}
+
+int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
+                                   const uint8_t *mask, int mask_stride,
+                                   MV *mvp_full, int step_param,
+                                   int sadpb, int further_steps, int do_refine,
+                                   const vp10_variance_fn_ptr_t *fn_ptr,
+                                   const MV *ref_mv, MV *dst_mv,
+                                   int is_second) {
+  MV temp_mv;
+  int thissme, n, num00 = 0;
+  int bestsme = masked_diamond_search_sad(x, &cpi->ss_cfg,
+                                          mask, mask_stride,
+                                          mvp_full, &temp_mv,
+                                          step_param, sadpb, &n,
+                                          fn_ptr, ref_mv, is_second);
+  if (bestsme < INT_MAX)
+    bestsme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv,
+                                    fn_ptr, 1, is_second);
+  *dst_mv = temp_mv;
+
+  // If there won't be more n-step search, check to see if refining search is
+  // needed.
+  if (n > further_steps)
+    do_refine = 0;
+
+  while (n < further_steps) {
+    ++n;
+
+    if (num00) {
+      num00--;
+    } else {
+      thissme = masked_diamond_search_sad(x, &cpi->ss_cfg,
+                                          mask, mask_stride,
+                                          mvp_full, &temp_mv,
+                                          step_param + n, sadpb, &num00,
+                                          fn_ptr, ref_mv, is_second);
+      if (thissme < INT_MAX)
+        thissme = get_masked_mvpred_var(x, mask, mask_stride,
+                                        &temp_mv, ref_mv, fn_ptr, 1,
+                                        is_second);
+
+      // check to see if refining search is needed.
+      if (num00 > further_steps - n)
+        do_refine = 0;
+
+      if (thissme < bestsme) {
+        bestsme = thissme;
+        *dst_mv = temp_mv;
+      }
+    }
+  }
+
+  // final 1-away diamond refining search
+  if (do_refine) {
+    const int search_range = 8;
+    MV best_mv = *dst_mv;
+    thissme = masked_refining_search_sad(x, mask, mask_stride,
+                                         &best_mv, sadpb, search_range,
+                                         fn_ptr, ref_mv, is_second);
+    if (thissme < INT_MAX)
+      thissme = get_masked_mvpred_var(x, mask, mask_stride,
+                                      &best_mv, ref_mv, fn_ptr, 1,
+                                      is_second);
+    if (thissme < bestsme) {
+      bestsme = thissme;
+      *dst_mv = best_mv;
+    }
+  }
+  return bestsme;
+}
+#endif  // CONFIG_EXT_INTER
diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h
index 3063b996e7b095a6fd22a95bd687af20d3880575..a430c76c2d9e795bfdc84b91b2c631db76d4ff37 100644
--- a/vp10/encoder/mcomp.h
+++ b/vp10/encoder/mcomp.h
@@ -53,12 +53,12 @@ int vp10_mv_bit_cost(const MV *mv, const MV *ref,
 // Utility to compute variance + MV rate cost for a given MV
 int vp10_get_mvpred_var(const MACROBLOCK *x,
                        const MV *best_mv, const MV *center_mv,
-                       const vp9_variance_fn_ptr_t *vfp,
+                       const vp10_variance_fn_ptr_t *vfp,
                        int use_mvcost);
 int vp10_get_mvpred_av_var(const MACROBLOCK *x,
                           const MV *best_mv, const MV *center_mv,
                           const uint8_t *second_pred,
-                          const vp9_variance_fn_ptr_t *vfp,
+                          const vp10_variance_fn_ptr_t *vfp,
                           int use_mvcost);
 
 struct VP10_COMP;
@@ -69,7 +69,7 @@ int vp10_init_search_range(int size);
 int vp10_refining_search_sad(const struct macroblock *x,
                             struct mv *ref_mv,
                             int sad_per_bit, int distance,
-                            const struct vp9_variance_vtable *fn_ptr,
+                            const vp10_variance_fn_ptr_t *fn_ptr,
                             const struct mv *center_mv);
 
 // Runs sequence of diamond searches in smaller steps for RD.
@@ -77,7 +77,7 @@ int vp10_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
                            MV *mvp_full, int step_param,
                            int sadpb, int further_steps, int do_refine,
                            int *cost_list,
-                           const vp9_variance_fn_ptr_t *fn_ptr,
+                           const vp10_variance_fn_ptr_t *fn_ptr,
                            const MV *ref_mv, MV *dst_mv);
 
 // Perform integral projection based motion estimation.
@@ -93,7 +93,7 @@ typedef int (integer_mv_pattern_search_fn) (
     int error_per_bit,
     int do_init_search,
     int *cost_list,
-    const vp9_variance_fn_ptr_t *vf,
+    const vp10_variance_fn_ptr_t *vf,
     int use_mvcost,
     const MV *center_mv,
     MV *best_mv);
@@ -109,7 +109,7 @@ typedef int (fractional_mv_step_fp) (
     MV *bestmv, const MV *ref_mv,
     int allow_hp,
     int error_per_bit,
-    const vp9_variance_fn_ptr_t *vfp,
+    const vp10_variance_fn_ptr_t *vfp,
     int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only
     int iters_per_step,
     int *cost_list,
@@ -130,13 +130,13 @@ extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned_evenmore;
 typedef int (*vp10_full_search_fn_t)(const MACROBLOCK *x,
                                     const MV *ref_mv, int sad_per_bit,
                                     int distance,
-                                    const vp9_variance_fn_ptr_t *fn_ptr,
+                                    const vp10_variance_fn_ptr_t *fn_ptr,
                                     const MV *center_mv, MV *best_mv);
 
 typedef int (*vp10_refining_search_fn_t)(const MACROBLOCK *x,
                                         MV *ref_mv, int sad_per_bit,
                                         int distance,
-                                        const vp9_variance_fn_ptr_t *fn_ptr,
+                                        const vp10_variance_fn_ptr_t *fn_ptr,
                                         const MV *center_mv);
 
 typedef int (*vp10_diamond_search_fn_t)(const MACROBLOCK *x,
@@ -144,13 +144,13 @@ typedef int (*vp10_diamond_search_fn_t)(const MACROBLOCK *x,
                                        MV *ref_mv, MV *best_mv,
                                        int search_param, int sad_per_bit,
                                        int *num00,
-                                       const vp9_variance_fn_ptr_t *fn_ptr,
+                                       const vp10_variance_fn_ptr_t *fn_ptr,
                                        const MV *center_mv);
 
 int vp10_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
                              int search_range,
-                             const vp9_variance_fn_ptr_t *fn_ptr,
+                             const vp10_variance_fn_ptr_t *fn_ptr,
                              const MV *center_mv, const uint8_t *second_pred);
 
 struct VP10_COMP;
@@ -162,6 +162,26 @@ int vp10_full_pixel_search(struct VP10_COMP *cpi, MACROBLOCK *x,
                           const MV *ref_mv, MV *tmp_mv,
                           int var_max, int rd);
 
+#if CONFIG_EXT_INTER
+int vp10_find_best_masked_sub_pixel_tree(const MACROBLOCK *x,
+                                         const uint8_t *mask, int mask_stride,
+                                         MV *bestmv, const MV *ref_mv,
+                                         int allow_hp,
+                                         int error_per_bit,
+                                         const vp10_variance_fn_ptr_t *vfp,
+                                         int forced_stop,
+                                         int iters_per_step,
+                                         int *mvjcost, int *mvcost[2],
+                                         int *distortion,
+                                         unsigned int *sse1, int is_second);
+int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
+                                   const uint8_t *mask, int mask_stride,
+                                   MV *mvp_full, int step_param,
+                                   int sadpb, int further_steps, int do_refine,
+                                   const vp10_variance_fn_ptr_t *fn_ptr,
+                                   const MV *ref_mv, MV *dst_mv,
+                                   int is_second);
+#endif  // CONFIG_EXT_INTER
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 66261eaf993b9e885c9a1ca7c555c39f947678d2..c65bdf13fa55a24c3fa7707deebe6fed2912408a 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -44,10 +44,6 @@
 #include "vp10/encoder/rdopt.h"
 #include "vp10/encoder/aq_variance.h"
 
-// TODO(geza.lore) Update this when the extended coding unit size experiment
-// have been ported.
-#define CU_SIZE 64
-
 #if CONFIG_EXT_REFS
 
 #define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
@@ -4315,8 +4311,8 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
     if (bsize >= BLOCK_8X8)
 #endif  // CONFIG_EXT_INTER
     *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
-                                &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
-                                x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+                                 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
+                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
 #if CONFIG_EXT_INTER
     else
       *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
@@ -5117,6 +5113,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
 #else
   int ref = mbmi->ref_frame[0];
   MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+  int ref_idx = 0;
 #endif  // CONFIG_EXT_INTER
 
   int tmp_col_min = x->mv_col_min;
@@ -5143,9 +5140,9 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
     // match the resolution of the current frame, allowing the existing
     // motion search code to be used without additional modifications.
     for (i = 0; i < MAX_MB_PLANE; i++)
-      backup_yv12[i] = xd->plane[i].pre[0];
+      backup_yv12[i] = xd->plane[i].pre[ref_idx];
 
-    vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+    vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
   }
 
   vp10_set_mv_search_range(x, &ref_mv);
@@ -5189,7 +5186,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
           if (scaled_ref_frame) {
             int i;
             for (i = 0; i < MAX_MB_PLANE; ++i)
-              xd->plane[i].pre[0] = backup_yv12[i];
+              xd->plane[i].pre[ref_idx] = backup_yv12[i];
           }
           return;
         }
@@ -5203,8 +5200,8 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
   mvp_full.row >>= 3;
 
   bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
-                                  cond_cost_list(cpi, cost_list),
-                                  &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
+                                   cond_cost_list(cpi, cost_list),
+                                   &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
 
   x->mv_col_min = tmp_col_min;
   x->mv_col_max = tmp_col_max;
@@ -5218,11 +5215,11 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
     const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
     // Use up-sampled reference frames.
     struct macroblockd_plane *const pd = &xd->plane[0];
-    struct buf_2d backup_pred = pd->pre[0];
+    struct buf_2d backup_pred = pd->pre[ref_idx];
     const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
 
     // Set pred for Y plane
-    setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
+    setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer,
                      upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
                      NULL, pd->subsampling_x, pd->subsampling_y);
 
@@ -5238,7 +5235,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
                                            pw, ph, 1);
 
     // Restore the reference frames.
-    pd->pre[0] = backup_pred;
+    pd->pre[ref_idx] = backup_pred;
 #else
     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
                                  cm->allow_high_precision_mv,
@@ -5260,7 +5257,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
   if (scaled_ref_frame) {
     int i;
     for (i = 0; i < MAX_MB_PLANE; i++)
-      xd->plane[i].pre[0] = backup_yv12[i];
+      xd->plane[i].pre[ref_idx] = backup_yv12[i];
   }
 }
 
@@ -5274,6 +5271,176 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
   }
 }
 
+#if CONFIG_EXT_INTER
+static void do_masked_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
+                                    const uint8_t *mask, int mask_stride,
+                                    BLOCK_SIZE bsize,
+                                    int mi_row, int mi_col,
+                                    int_mv *tmp_mv, int *rate_mv,
+                                    int ref_idx,
+                                    int mv_idx) {
+  MACROBLOCKD *xd = &x->e_mbd;
+  const VP10_COMMON *cm = &cpi->common;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
+  int bestsme = INT_MAX;
+  int step_param;
+  int sadpb = x->sadperbit16;
+  MV mvp_full;
+  int ref = mbmi->ref_frame[ref_idx];
+  MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
+
+  int tmp_col_min = x->mv_col_min;
+  int tmp_col_max = x->mv_col_max;
+  int tmp_row_min = x->mv_row_min;
+  int tmp_row_max = x->mv_row_max;
+
+  const YV12_BUFFER_CONFIG *scaled_ref_frame =
+      vp10_get_scaled_ref_frame(cpi, ref);
+
+  MV pred_mv[3];
+  pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+  pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
+  pred_mv[2] = x->pred_mv[ref];
+
+#if CONFIG_REF_MV
+  vp10_set_mvcost(x, ref);
+#endif
+
+  if (scaled_ref_frame) {
+    int i;
+    // Swap out the reference frame for a version that's been scaled to
+    // match the resolution of the current frame, allowing the existing
+    // motion search code to be used without additional modifications.
+    for (i = 0; i < MAX_MB_PLANE; i++)
+      backup_yv12[i] = xd->plane[i].pre[ref_idx];
+
+    vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
+  }
+
+  vp10_set_mv_search_range(x, &ref_mv);
+
+  // Work out the size of the first step in the mv step search.
+  // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+  if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
+    // Take wtd average of the step_params based on the last frame's
+    // max mv magnitude and that based on the best ref mvs of the current
+    // block for the given reference.
+    step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
+                  cpi->mv_step_param) / 2;
+  } else {
+    step_param = cpi->mv_step_param;
+  }
+
+  // TODO(debargha): is show_frame needed here?
+  if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST &&
+      cm->show_frame) {
+    int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] -
+          VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
+    step_param = VPXMAX(step_param, boffset);
+  }
+
+  if (cpi->sf.adaptive_motion_search) {
+    int bwl = b_width_log2_lookup[bsize];
+    int bhl = b_height_log2_lookup[bsize];
+    int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
+
+    if (tlevel < 5)
+      step_param += 2;
+
+    // prev_mv_sad is not setup for dynamically scaled frames.
+    if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
+      int i;
+      for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
+        if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
+          x->pred_mv[ref].row = 0;
+          x->pred_mv[ref].col = 0;
+          tmp_mv->as_int = INVALID_MV;
+
+          if (scaled_ref_frame) {
+            int i;
+            for (i = 0; i < MAX_MB_PLANE; ++i)
+              xd->plane[i].pre[ref_idx] = backup_yv12[i];
+          }
+          return;
+        }
+      }
+    }
+  }
+
+  mvp_full = pred_mv[x->mv_best_ref_index[ref]];
+
+  mvp_full.col >>= 3;
+  mvp_full.row >>= 3;
+
+  bestsme = vp10_masked_full_pixel_diamond(cpi, x, mask, mask_stride,
+                                           &mvp_full, step_param, sadpb,
+                                           MAX_MVSEARCH_STEPS - 1 - step_param,
+                                           1, &cpi->fn_ptr[bsize],
+                                           &ref_mv, &tmp_mv->as_mv, ref_idx);
+
+  x->mv_col_min = tmp_col_min;
+  x->mv_col_max = tmp_col_max;
+  x->mv_row_min = tmp_row_min;
+  x->mv_row_max = tmp_row_max;
+
+  if (bestsme < INT_MAX) {
+    int dis;  /* TODO: use dis in distortion calculation later. */
+    vp10_find_best_masked_sub_pixel_tree(x, mask, mask_stride,
+                                         &tmp_mv->as_mv, &ref_mv,
+                                         cm->allow_high_precision_mv,
+                                         x->errorperbit,
+                                         &cpi->fn_ptr[bsize],
+                                         cpi->sf.mv.subpel_force_stop,
+                                         cpi->sf.mv.subpel_iters_per_step,
+                                         x->nmvjointcost, x->mvcost,
+                                         &dis, &x->pred_sse[ref], ref_idx);
+  }
+  *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
+                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+
+  if (cpi->sf.adaptive_motion_search && cm->show_frame)
+    x->pred_mv[ref] = tmp_mv->as_mv;
+
+  if (scaled_ref_frame) {
+    int i;
+    for (i = 0; i < MAX_MB_PLANE; i++)
+      xd->plane[i].pre[ref_idx] = backup_yv12[i];
+  }
+}
+
+static void do_masked_motion_search_indexed(VP10_COMP *cpi, MACROBLOCK *x,
+                                            int wedge_index,
+                                            BLOCK_SIZE bsize,
+                                            int mi_row, int mi_col,
+                                            int_mv *tmp_mv, int *rate_mv,
+                                            int mv_idx[2],
+                                            int which) {
+  // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
+  MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  BLOCK_SIZE sb_type = mbmi->sb_type;
+  int w = (4 << b_width_log2_lookup[sb_type]);
+  int h = (4 << b_height_log2_lookup[sb_type]);
+  const uint8_t *mask;
+  const int mask_stride = MASK_MASTER_STRIDE;
+  mask = vp10_get_soft_mask(wedge_index, sb_type, h, w);
+
+  if (which == 0 || which == 2)
+    do_masked_motion_search(cpi, x, mask, mask_stride, bsize,
+                            mi_row, mi_col, &tmp_mv[0], &rate_mv[0],
+                            0, mv_idx[0]);
+
+  if (which == 1 || which == 2) {
+    // get the negative mask
+    mask = vp10_get_soft_mask(wedge_index ^ 1, sb_type, h, w);
+    do_masked_motion_search(cpi, x, mask, mask_stride, bsize,
+                            mi_row, mi_col, &tmp_mv[1], &rate_mv[1],
+                            1, mv_idx[1]);
+  }
+}
+#endif  // CONFIG_EXT_INTER
+
 // In some situations we want to discount tha pparent cost of a new motion
 // vector. Where there is a subtle motion field and especially where there is
 // low spatial complexity then it can be hard to cover the cost of a new motion
@@ -5306,6 +5473,7 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
                xd->mb_to_top_edge - LEFT_TOP_MARGIN,
                xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 }
+
 static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi,
                                            const MACROBLOCK *x,
                                            const BLOCK_SIZE bsize,
@@ -5434,6 +5602,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                  int_mv single_newmvs[2][MAX_REF_FRAMES],
                                  int single_newmvs_rate[2][MAX_REF_FRAMES],
                                  int *compmode_interintra_cost,
+                                 int *compmode_wedge_cost,
 #else
                                  int_mv single_newmv[MAX_REF_FRAMES],
 #endif  // CONFIG_EXT_INTER
@@ -5454,41 +5623,47 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
   int refs[2] = { mbmi->ref_frame[0],
     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
   int_mv cur_mv[2];
+  int rate_mv = 0;
 #if CONFIG_EXT_INTER
   int mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0;
   int_mv single_newmv[MAX_REF_FRAMES];
   const int * const intra_mode_cost =
     cpi->mbmode_cost[size_group_lookup[bsize]];
   const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
+  const int tmp_buf_sz = CU_SIZE * CU_SIZE;
 #if CONFIG_REF_MV
   uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
 #endif
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
   uint8_t *tmp_buf;
 #else
-  DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_OBMC
+  int allow_obmc =
 #if CONFIG_EXT_INTER
-  const int tmp_buf_sz = CU_SIZE * CU_SIZE;
+      !is_comp_interintra_pred &&
 #endif  // CONFIG_EXT_INTER
-#if CONFIG_OBMC
-  int allow_obmc = is_obmc_allowed(mbmi);
+      is_obmc_allowed(mbmi);
   int best_obmc_flag = 0;
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * 64 * 64]);
-  DECLARE_ALIGNED(16, uint16_t, tmp_buf2_16[MAX_MB_PLANE * 64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, tmp_buf2_16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
   uint8_t *tmp_buf1, *tmp_buf2;
   uint8_t *obmc_tmp_buf1[3];
   uint8_t *obmc_tmp_buf2[3];
 #else
-  DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
-  DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
-  uint8_t *obmc_tmp_buf1[3] = {tmp_buf1, tmp_buf1 + 4096, tmp_buf1 + 8192};
-  uint8_t *obmc_tmp_buf2[3] = {tmp_buf2, tmp_buf2 + 4096, tmp_buf2 + 8192};
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
+  uint8_t *obmc_tmp_buf1[3] = {tmp_buf1, tmp_buf1 + CU_SIZE * CU_SIZE,
+    tmp_buf1 + CU_SIZE * CU_SIZE * 2};
+  uint8_t *obmc_tmp_buf2[3] = {tmp_buf2, tmp_buf2 + CU_SIZE * CU_SIZE,
+    tmp_buf2 + CU_SIZE * CU_SIZE * 2};
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-  int obmc_tmp_stride[3] = {64, 64, 64};
+  int obmc_tmp_stride[3] = {CU_SIZE, CU_SIZE, CU_SIZE};
 
   uint8_t skip_txfm_bestfilter[2][MAX_MB_PLANE << 2] = {{0}, {0}};
   int64_t bsse_bestfilter[2][MAX_MB_PLANE << 2] = {{0}, {0}};
@@ -5507,6 +5682,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
   unsigned int best_pred_var = UINT_MAX;
   MB_MODE_INFO best_mbmi;
 #endif  // CONFIG_OBMC
+
   int pred_exists = 0;
   int intpel_mv;
   int64_t rd, tmp_rd, best_rd = INT64_MAX;
@@ -5525,6 +5701,9 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
 #if CONFIG_EXT_INTER
   *compmode_interintra_cost = 0;
+  mbmi->use_wedge_interintra = 0;
+  *compmode_wedge_cost = 0;
+  mbmi->use_wedge_interinter = 0;
 
   // is_comp_interintra_pred implies !is_comp_pred
   assert(!is_comp_interintra_pred || (!is_comp_pred));
@@ -5575,12 +5754,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
       return INT64_MAX;
   }
 
-#if CONFIG_EXT_INTER
   if (have_newmv_in_inter_mode(this_mode)) {
-#else
-  if (this_mode == NEWMV) {
-#endif  // CONFIG_EXT_INTER
-    int rate_mv;
     if (is_comp_pred) {
 #if CONFIG_EXT_INTER
       for (i = 0; i < 2; ++i) {
@@ -5767,7 +5941,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
     }
   }
 #endif  // CONFIG_EXT_INTER
-#endif
+#endif  // CONFIG_REF_MV
 
   // do first prediction into the destination buffer. Do the next
   // prediction into a temporary buffer. Then keep track of which one
@@ -5805,10 +5979,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
   if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
 #if CONFIG_EXT_INTER
-      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV)
+      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
 #else
-      mbmi->mode != NEARESTMV)
+      mbmi->mode != NEARESTMV
 #endif  // CONFIG_EXT_INTER
+     )
     return INT64_MAX;
 
   pred_exists = 0;
@@ -6001,6 +6176,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
         skip_sse_sb = tmp_skip_sse;
         memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
         memcpy(bsse, x->bsse, sizeof(bsse));
+      } else {
+        pred_exists = 0;
       }
     }
     restore_dst_buf(xd, orig_dst, orig_dst_stride);
@@ -6012,12 +6189,169 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
   rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
 
 #if CONFIG_EXT_INTER
+  if (is_comp_pred && get_wedge_bits(bsize)) {
+    int wedge_index, best_wedge_index = WEDGE_NONE, rs;
+    int rate_sum;
+    int64_t dist_sum;
+    int64_t best_rd_nowedge = INT64_MAX;
+    int64_t best_rd_wedge = INT64_MAX;
+    int wedge_types;
+    int tmp_skip_txfm_sb;
+    int64_t tmp_skip_sse_sb;
+    rs = vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+    vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+    model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+                    &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+    rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+    best_rd_nowedge = rd;
+    mbmi->use_wedge_interinter = 1;
+    rs = get_wedge_bits(bsize) * 256 +
+        vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
+    wedge_types = (1 << get_wedge_bits(bsize));
+    if (have_newmv_in_inter_mode(this_mode)) {
+      int_mv tmp_mv[2];
+      int rate_mvs[2], tmp_rate_mv = 0;
+      uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3];
+      uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3];
+      uint8_t *preds0[3] = {pred0,
+                            pred0 + 2 * CU_SIZE * CU_SIZE,
+                            pred0 + 4 * CU_SIZE * CU_SIZE};
+      uint8_t *preds1[3] = {pred1,
+                            pred1 + 2 * CU_SIZE * CU_SIZE,
+                            pred1 + 4 * CU_SIZE * CU_SIZE};
+      int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE};
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, mi_row, mi_col, 0, preds0, strides);
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, mi_row, mi_col, 1, preds1, strides);
+
+      for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+        mbmi->interinter_wedge_index = wedge_index;
+        vp10_build_wedge_inter_predictor_from_buf(xd, bsize, mi_row, mi_col,
+                                                  preds0, strides,
+                                                  preds1, strides);
+        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+                        &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+        rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+        if (rd < best_rd_wedge) {
+          best_wedge_index = wedge_index;
+          best_rd_wedge = rd;
+        }
+      }
+      mbmi->interinter_wedge_index = best_wedge_index;
+      if (this_mode == NEW_NEWMV) {
+        int mv_idxs[2] = {0, 0};
+        do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index,
+                                        bsize, mi_row, mi_col, tmp_mv, rate_mvs,
+                                        mv_idxs, 2);
+        tmp_rate_mv = rate_mvs[0] + rate_mvs[1];
+        mbmi->mv[0].as_int = tmp_mv[0].as_int;
+        mbmi->mv[1].as_int = tmp_mv[1].as_int;
+      } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
+        int mv_idxs[2] = {0, 0};
+        do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index,
+                                        bsize, mi_row, mi_col, tmp_mv, rate_mvs,
+                                        mv_idxs, 0);
+        tmp_rate_mv = rate_mvs[0];
+        mbmi->mv[0].as_int = tmp_mv[0].as_int;
+      } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
+        int mv_idxs[2] = {0, 0};
+        do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index,
+                                        bsize, mi_row, mi_col, tmp_mv, rate_mvs,
+                                        mv_idxs, 1);
+        tmp_rate_mv = rate_mvs[1];
+        mbmi->mv[1].as_int = tmp_mv[1].as_int;
+      }
+      vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+      model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+                      &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+      rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
+      if (rd < best_rd_wedge) {
+        best_rd_wedge = rd;
+      } else {
+        mbmi->mv[0].as_int = cur_mv[0].as_int;
+        mbmi->mv[1].as_int = cur_mv[1].as_int;
+        tmp_rate_mv = rate_mv;
+      }
+      if (best_rd_wedge < best_rd_nowedge) {
+        mbmi->use_wedge_interinter = 1;
+        mbmi->interinter_wedge_index = best_wedge_index;
+        xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+        xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
+        *rate2 += tmp_rate_mv - rate_mv;
+        rate_mv = tmp_rate_mv;
+      } else {
+        mbmi->use_wedge_interinter = 0;
+        mbmi->mv[0].as_int = cur_mv[0].as_int;
+        mbmi->mv[1].as_int = cur_mv[1].as_int;
+      }
+    } else {
+      uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3];
+      uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3];
+      uint8_t *preds0[3] = {pred0,
+                            pred0 + 2 * CU_SIZE * CU_SIZE,
+                            pred0 + 4 * CU_SIZE * CU_SIZE};
+      uint8_t *preds1[3] = {pred1,
+                            pred1 + 2 * CU_SIZE * CU_SIZE,
+                            pred1 + 4 * CU_SIZE * CU_SIZE};
+      int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE};
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, mi_row, mi_col, 0, preds0, strides);
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, mi_row, mi_col, 1, preds1, strides);
+      for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+        mbmi->interinter_wedge_index = wedge_index;
+        // vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+        vp10_build_wedge_inter_predictor_from_buf(xd, bsize, mi_row, mi_col,
+                                                  preds0, strides,
+                                                  preds1, strides);
+        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+                        &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+        rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+        if (rd < best_rd_wedge) {
+          best_wedge_index = wedge_index;
+          best_rd_wedge = rd;
+        }
+      }
+      if (best_rd_wedge < best_rd_nowedge) {
+        mbmi->use_wedge_interinter = 1;
+        mbmi->interinter_wedge_index = best_wedge_index;
+      } else {
+        mbmi->use_wedge_interinter = 0;
+      }
+    }
+#if CONFIG_OBMC
+    if (mbmi->use_wedge_interinter)
+      allow_obmc = 0;
+#endif  // CONFIG_OBMC
+    if (ref_best_rd < INT64_MAX &&
+        VPXMIN(best_rd_wedge, best_rd_nowedge) / 2 > ref_best_rd)
+      return INT64_MAX;
+
+    pred_exists = 0;
+    tmp_rd = VPXMIN(best_rd_wedge, best_rd_nowedge);
+    if (mbmi->use_wedge_interinter)
+      *compmode_wedge_cost = get_wedge_bits(bsize) * 256 +
+          vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
+    else
+      *compmode_wedge_cost =
+          vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+  }
+
   if (is_comp_interintra_pred) {
     PREDICTION_MODE interintra_mode, best_interintra_mode = DC_PRED;
     int64_t best_interintra_rd = INT64_MAX;
     int rmode, rate_sum;
     int64_t dist_sum;
     int j;
+    int wedge_bits, wedge_types, wedge_index, best_wedge_index = -1;
+    int64_t best_interintra_rd_nowedge = INT64_MAX;
+    int64_t best_interintra_rd_wedge = INT64_MAX;
+    int rwedge;
+    int bw = 4 << b_width_log2_lookup[mbmi->sb_type],
+        bh = 4 << b_height_log2_lookup[mbmi->sb_type];
+    int_mv tmp_mv;
+    int tmp_rate_mv = 0;
     mbmi->ref_frame[1] = NONE;
     for (j = 0; j < MAX_MB_PLANE; j++) {
       xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz;
@@ -6033,16 +6367,16 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
       mbmi->interintra_uv_mode = interintra_mode;
       rmode = intra_mode_cost[mbmi->interintra_mode];
       vp10_build_interintra_predictors(xd,
-                                      tmp_buf,
-                                      tmp_buf + tmp_buf_sz,
-                                      tmp_buf + 2 * tmp_buf_sz,
-                                      CU_SIZE,
-                                      CU_SIZE,
-                                      CU_SIZE,
-                                      bsize);
+                                       tmp_buf,
+                                       tmp_buf + tmp_buf_sz,
+                                       tmp_buf + 2 * tmp_buf_sz,
+                                       CU_SIZE,
+                                       CU_SIZE,
+                                       CU_SIZE,
+                                       bsize);
       model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
                       &skip_txfm_sb, &skip_sse_sb);
-      rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_sum, dist_sum);
+      rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
       if (rd < best_interintra_rd) {
         best_interintra_rd = rd;
         best_interintra_mode = interintra_mode;
@@ -6054,17 +6388,112 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
         best_interintra_rd / 2 > ref_best_rd) {
       return INT64_MAX;
     }
+    wedge_bits = get_wedge_bits(bsize);
+    rmode = intra_mode_cost[mbmi->interintra_mode];
+    if (wedge_bits) {
+      vp10_build_interintra_predictors(xd,
+                                       tmp_buf,
+                                       tmp_buf + tmp_buf_sz,
+                                       tmp_buf + 2 * tmp_buf_sz,
+                                       CU_SIZE,
+                                       CU_SIZE,
+                                       CU_SIZE,
+                                       bsize);
+      model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+                      &skip_txfm_sb, &skip_sse_sb);
+      rwedge = vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
+      rd = RDCOST(x->rdmult, x->rddiv,
+                  rmode + rate_mv + rwedge + rate_sum, dist_sum);
+      best_interintra_rd_nowedge = rd;
+
+      mbmi->use_wedge_interintra = 1;
+      rwedge = wedge_bits * 256 +
+          vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
+      wedge_types = (1 << wedge_bits);
+      for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+        mbmi->interintra_wedge_index = wedge_index;
+        mbmi->interintra_uv_wedge_index = wedge_index;
+        vp10_build_interintra_predictors(xd,
+                                         tmp_buf,
+                                         tmp_buf + tmp_buf_sz,
+                                         tmp_buf + 2 * tmp_buf_sz,
+                                         CU_SIZE,
+                                         CU_SIZE,
+                                         CU_SIZE,
+                                         bsize);
+        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+                        &skip_txfm_sb, &skip_sse_sb);
+        rd = RDCOST(x->rdmult, x->rddiv,
+                    rmode + rate_mv + rwedge + rate_sum, dist_sum);
+        if (rd < best_interintra_rd_wedge) {
+          best_interintra_rd_wedge = rd;
+          best_wedge_index = wedge_index;
+        }
+      }
+      // Refine motion vector.
+      if (have_newmv_in_inter_mode(this_mode)) {
+        // get negative of mask
+        const uint8_t* mask = vp10_get_soft_mask(
+            best_wedge_index ^ 1, bsize, bh, bw);
+        mbmi->interintra_wedge_index = best_wedge_index;
+        mbmi->interintra_uv_wedge_index = best_wedge_index;
+        do_masked_motion_search(cpi, x, mask, MASK_MASTER_STRIDE, bsize,
+                                mi_row, mi_col, &tmp_mv, &tmp_rate_mv,
+                                0, mv_idx);
+        mbmi->mv[0].as_int = tmp_mv.as_int;
+        vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+                        &skip_txfm_sb, &skip_sse_sb);
+        rd = RDCOST(x->rdmult, x->rddiv,
+                    rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
+        if (rd < best_interintra_rd_wedge) {
+          best_interintra_rd_wedge = rd;
+        } else {
+          tmp_mv.as_int = cur_mv[0].as_int;
+          tmp_rate_mv = rate_mv;
+        }
+      } else {
+        tmp_mv.as_int = cur_mv[0].as_int;
+        tmp_rate_mv = rate_mv;
+      }
+      if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
+        mbmi->use_wedge_interintra = 1;
+        mbmi->interintra_wedge_index = best_wedge_index;
+        mbmi->interintra_uv_wedge_index = best_wedge_index;
+        best_interintra_rd = best_interintra_rd_wedge;
+        mbmi->mv[0].as_int = tmp_mv.as_int;
+        *rate2 += tmp_rate_mv - rate_mv;
+        rate_mv = tmp_rate_mv;
+      } else {
+        mbmi->use_wedge_interintra = 0;
+        best_interintra_rd = best_interintra_rd_nowedge;
+        mbmi->mv[0].as_int = cur_mv[0].as_int;
+      }
+    }
 
     pred_exists = 0;
     tmp_rd = best_interintra_rd;
-
     *compmode_interintra_cost =
-      vp10_cost_bit(cm->fc->interintra_prob[bsize], 1);
+        vp10_cost_bit(cm->fc->interintra_prob[bsize], 1);
     *compmode_interintra_cost += intra_mode_cost[mbmi->interintra_mode];
+    if (get_wedge_bits(bsize)) {
+      *compmode_interintra_cost += vp10_cost_bit(
+          cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
+      if (mbmi->use_wedge_interintra) {
+        *compmode_interintra_cost += get_wedge_bits(bsize) * 256;
+      }
+    }
   } else if (is_interintra_allowed(mbmi)) {
     *compmode_interintra_cost =
       vp10_cost_bit(cm->fc->interintra_prob[bsize], 0);
   }
+
+#if CONFIG_EXT_INTERP
+  if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
+    mbmi->interp_filter = EIGHTTAP_REGULAR;
+    pred_exists = 0;
+  }
+#endif  // CONFIG_EXT_INTERP
 #endif  // CONFIG_EXT_INTER
 
 #if CONFIG_OBMC
@@ -6811,17 +7240,15 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
     int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
     x->mbmi_ext->mode_context[ref_frame] = 0;
     vp10_find_mv_refs(cm, xd, mi, ref_frame,
-#if CONFIG_REF_MV
                       &mbmi_ext->ref_mv_count[ref_frame],
                       mbmi_ext->ref_mv_stack[ref_frame],
 #if CONFIG_EXT_INTER
                       mbmi_ext->compound_mode_context,
 #endif  // CONFIG_EXT_INTER
-#endif
                       candidates, mi_row, mi_col,
                       NULL, NULL, mbmi_ext->mode_context);
   }
-#endif
+#endif  // CONFIG_REF_MV
 
 #if CONFIG_OBMC
   vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1,
@@ -6947,6 +7374,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
     int compmode_cost = 0;
 #if CONFIG_EXT_INTER
     int compmode_interintra_cost = 0;
+    int compmode_wedge_cost = 0;
 #endif  // CONFIG_EXT_INTER
     int rate2 = 0, rate_y = 0, rate_uv = 0;
     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
@@ -7335,6 +7763,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
                                   single_newmvs,
                                   single_newmvs_rate,
                                   &compmode_interintra_cost,
+                                  &compmode_wedge_cost,
 #else
                                   single_newmv,
 #endif  // CONFIG_EXT_INTER
@@ -7401,6 +7830,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
             int dummy_single_newmvs_rate[2][MAX_REF_FRAMES] =
                                           { { 0 }, { 0 } };
             int dummy_compmode_interintra_cost = 0;
+            int dummy_compmode_wedge_cost = 0;
 #else
             int_mv dummy_single_newmv[MAX_REF_FRAMES] = { { 0 } };
 #endif
@@ -7420,6 +7850,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
                                            dummy_single_newmvs,
                                            dummy_single_newmvs_rate,
                                            &dummy_compmode_interintra_cost,
+                                           &dummy_compmode_wedge_cost,
 #else
                                            dummy_single_newmv,
 #endif
@@ -7496,6 +7927,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
 
 #if CONFIG_EXT_INTER
     rate2 += compmode_interintra_cost;
+    if (cm->reference_mode != SINGLE_REFERENCE && comp_pred)
+      rate2 += compmode_wedge_cost;
 #endif  // CONFIG_EXT_INTER
 
     // Estimate the reference frame signaling cost and add it
@@ -8112,6 +8545,10 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
 #if CONFIG_OBMC
   mbmi->obmc = 0;
 #endif  // CONFIG_OBMC
+#if CONFIG_EXT_INTER
+  mbmi->use_wedge_interinter = 0;
+  mbmi->use_wedge_interintra = 0;
+#endif  // CONFIG_EXT_INTER
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     filter_cache[i] = INT64_MAX;
@@ -8859,7 +9296,7 @@ void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
     mi_step = VPXMIN(xd->n8_w,
                      num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
 
-    if (!is_inter_block(above_mbmi))
+    if (!is_neighbor_overlappable(above_mbmi))
       continue;
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
@@ -8869,14 +9306,27 @@ void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
                        0, i, NULL,
                        pd->subsampling_x, pd->subsampling_y);
     }
+    /*
     set_ref_ptrs(cm, xd, above_mbmi->ref_frame[0], above_mbmi->ref_frame[1]);
     for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
-      YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
-                                                  above_mbmi->ref_frame[ref]);
+      YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(
+          cpi, above_mbmi->ref_frame[ref]);
       assert(cfg != NULL);
       vp10_setup_pre_planes(xd, ref, cfg, mi_row, mi_col + i,
                             &xd->block_refs[ref]->sf);
     }
+    */
+    for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
+      MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+      RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+      xd->block_refs[ref] = ref_buf;
+      if ((!vp10_is_valid_scale(&ref_buf->sf)))
+        vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
+                            &ref_buf->sf);
+    }
 
     xd->mb_to_left_edge   = -(((mi_col + i) * MI_SIZE) * 8);
     mi_x = (mi_col + i) << MI_SIZE_LOG2;
@@ -8905,11 +9355,19 @@ void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
 
             build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
                                    y * 2 + x, bw, bh,
-                                   4 * x, 0, pw, bh, mi_x, mi_y);
+                                   4 * x, 0, pw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   mi_x, mi_y);
           }
       } else {
-        build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
-                               bw, bh, 0, 0, bw, bh, mi_x, mi_y);
+        build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+                               0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               mi_x, mi_y);
       }
     }
   }
@@ -8937,11 +9395,12 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
     MODE_INFO *left_mi = xd->mi[mi_col_offset +
                                 mi_row_offset * xd->mi_stride];
     MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+    const int is_compound = has_second_ref(left_mbmi);
 
     mi_step = VPXMIN(xd->n8_h,
                      num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
 
-    if (!is_inter_block(left_mbmi))
+    if (!is_neighbor_overlappable(left_mbmi))
       continue;
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
@@ -8951,6 +9410,7 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
                        i, 0, NULL,
                        pd->subsampling_x, pd->subsampling_y);
     }
+    /*
     set_ref_ptrs(cm, xd, left_mbmi->ref_frame[0], left_mbmi->ref_frame[1]);
     for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
       YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
@@ -8959,6 +9419,18 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
       vp10_setup_pre_planes(xd, ref, cfg, mi_row + i, mi_col,
                             &xd->block_refs[ref]->sf);
     }
+    */
+    for (ref = 0; ref < 1 + is_compound; ++ref) {
+      MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+      RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+      xd->block_refs[ref] = ref_buf;
+      if ((!vp10_is_valid_scale(&ref_buf->sf)))
+        vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
+                            &ref_buf->sf);
+    }
 
     xd->mb_to_top_edge    = -(((mi_row + i) * MI_SIZE) * 8);
     mi_x = mi_col << MI_SIZE_LOG2;
@@ -8987,11 +9459,19 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
 
             build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
                                    y * 2 + x, bw, bh,
-                                   0, 4 * y, bw, ph, mi_x, mi_y);
+                                   0, 4 * y, bw, ph,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   mi_x, mi_y);
           }
       } else {
         build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
-                               bw, bh, 0, 0, bw, bh, mi_x, mi_y);
+                               bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               mi_x, mi_y);
       }
     }
   }
diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h
index cd0fd98785b0bccbcf875251bb77c2a93fa6fd1d..161d6474d0de6d71f26c2b1076f066f3823ecae0 100644
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -74,7 +74,32 @@ typedef struct variance_vtable {
 } vp8_variance_fn_ptr_t;
 #endif  // CONFIG_VP8
 
-#if CONFIG_VP9 || CONFIG_VP10
+#if CONFIG_VP10 && CONFIG_EXT_INTER
+typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src_ptr,
+                                           int source_stride,
+                                           const uint8_t *ref_ptr,
+                                           int ref_stride,
+                                           const uint8_t *msk_ptr,
+                                           int msk_stride);
+typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src_ptr,
+                                                 int source_stride,
+                                                 const uint8_t *ref_ptr,
+                                                 int ref_stride,
+                                                 const uint8_t *msk_ptr,
+                                                 int msk_stride,
+                                                 unsigned int *sse);
+typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src_ptr,
+                                                       int source_stride,
+                                                       int xoffset,
+                                                       int yoffset,
+                                                       const uint8_t *ref_ptr,
+                                                       int Refstride,
+                                                       const uint8_t *msk_ptr,
+                                                       int msk_stride,
+                                                       unsigned int *sse);
+#endif  // CONFIG_VP10 && CONFIG_EXT_INTER
+
+#if CONFIG_VP9
 typedef struct vp9_variance_vtable {
   vpx_sad_fn_t               sdf;
   vpx_sad_avg_fn_t           sdaf;
@@ -85,7 +110,25 @@ typedef struct vp9_variance_vtable {
   vpx_sad_multi_fn_t         sdx8f;
   vpx_sad_multi_d_fn_t       sdx4df;
 } vp9_variance_fn_ptr_t;
-#endif  // CONFIG_VP9 || CONFIG_VP10
+#endif  // CONFIG_VP9
+
+#if CONFIG_VP10
+typedef struct vp10_variance_vtable {
+  vpx_sad_fn_t                   sdf;
+  vpx_sad_avg_fn_t               sdaf;
+  vpx_variance_fn_t              vf;
+  vpx_subpixvariance_fn_t        svf;
+  vpx_subp_avg_variance_fn_t     svaf;
+  vpx_sad_multi_fn_t             sdx3f;
+  vpx_sad_multi_fn_t             sdx8f;
+  vpx_sad_multi_d_fn_t           sdx4df;
+#if CONFIG_EXT_INTER
+  vpx_masked_sad_fn_t            msdf;
+  vpx_masked_variance_fn_t       mvf;
+  vpx_masked_subpixvariance_fn_t msvf;
+#endif  // CONFIG_EXT_INTER
+} vp10_variance_fn_ptr_t;
+#endif  // CONFIG_VP10
 
 #ifdef __cplusplus
 }  // extern "C"