From 6bfa0aa88e11bfc476610b98e131a952ab0bfdf0 Mon Sep 17 00:00:00 2001
From: fbossen <frank@bossentech.com>
Date: Tue, 23 Oct 2018 17:01:19 -0400
Subject: [PATCH] Add support for compound mode (#663)

* Enable coding of compound mode

* Modify inter prediction function to support compound mode

* Call motion estimation function early and store results for use by compound mode

* Add compound as an RDO candidate

* Increase frame coding structure pyramid size

* Make ME search range dependent on frame distance
---
 src/api.rs         |   8 +-
 src/context.rs     | 606 +++++++++++++++++++++++++++++++++------------
 src/encoder.rs     | 286 +++++++++++++--------
 src/entropymode.rs |   4 +-
 src/me.rs          |   5 +-
 src/partition.rs   | 251 +++++++++++--------
 src/predict.rs     |   8 +
 src/rdo.rs         | 129 ++++++----
 8 files changed, 892 insertions(+), 405 deletions(-)

diff --git a/src/api.rs b/src/api.rs
index a05dca24..d75f785d 100644
--- a/src/api.rs
+++ b/src/api.rs
@@ -179,7 +179,7 @@ impl Context {
     let reorder = false;
     let multiref = reorder || self.fi.config.speed <= 2;
 
-    let pyramid_depth = if reorder { 1 } else { 0 };
+    let pyramid_depth = if reorder { 2 } else { 0 };
     let group_src_len = 1 << pyramid_depth;
     let group_len = group_src_len + if reorder { pyramid_depth } else { 0 };
     let segment_len = 1 + (key_frame_interval - 1 + group_src_len - 1) / group_src_len * group_len;
@@ -264,7 +264,13 @@ impl Context {
         } as u8) & 7;
       }
 
+      self.fi.reference_mode = if multiref && reorder && idx_in_group != 0 {
+        ReferenceMode::SELECT
+      } else {
+        ReferenceMode::SINGLE
+      };
       self.fi.number = segment_idx * key_frame_interval + self.fi.order_hint as u64;
+      self.fi.me_range_scale = (group_src_len >> lvl) as u8;
     }
 
     true
diff --git a/src/context.rs b/src/context.rs
index ec1739a7..6cbc5792 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -30,6 +30,7 @@ use entropymode::*;
 use token_cdfs::*;
 use encoder::FrameInvariants;
 use scan_order::*;
+use encoder::ReferenceMode;
 
 use self::REF_CONTEXTS;
 use self::SINGLE_REFS;
@@ -692,8 +693,13 @@ pub struct CDFContext {
   intra_inter_cdfs: [[u16; 3]; INTRA_INTER_CONTEXTS],
   angle_delta_cdf: [[u16; 2 * MAX_ANGLE_DELTA + 1 + 1]; DIRECTIONAL_MODES],
   filter_intra_cdfs: [[u16; 3]; BlockSize::BLOCK_SIZES_ALL],
+  comp_mode_cdf: [[u16; 3]; COMP_INTER_CONTEXTS],
+  comp_ref_type_cdf: [[u16; 3]; COMP_REF_TYPE_CONTEXTS],
+  comp_ref_cdf: [[[u16; 3]; FWD_REFS - 1]; REF_CONTEXTS],
+  comp_bwd_ref_cdf: [[[u16; 3]; BWD_REFS - 1]; REF_CONTEXTS],
   single_ref_cdfs: [[[u16; 2 + 1]; SINGLE_REFS - 1]; REF_CONTEXTS],
   drl_cdfs: [[u16; 2 + 1]; DRL_MODE_CONTEXTS],
+  compound_mode_cdf: [[u16; INTER_COMPOUND_MODES + 1]; INTER_MODE_CONTEXTS],
   nmv_context: NMVContext,
   deblock_delta_multi_cdf: [[u16; DELTA_LF_PROBS + 1 + 1]; FRAME_LF_COUNT],
   deblock_delta_cdf: [u16; DELTA_LF_PROBS + 1 + 1],
@@ -744,8 +750,13 @@ impl CDFContext {
       intra_inter_cdfs: default_intra_inter_cdf,
       angle_delta_cdf: default_angle_delta_cdf,
       filter_intra_cdfs: default_filter_intra_cdfs,
+      comp_mode_cdf: default_comp_mode_cdf,
+      comp_ref_type_cdf: default_comp_ref_type_cdf,
+      comp_ref_cdf: default_comp_ref_cdf,
+      comp_bwd_ref_cdf: default_comp_bwdref_cdf,
       single_ref_cdfs: default_single_ref_cdf,
       drl_cdfs: default_drl_cdf,
+      compound_mode_cdf: default_compound_mode_cdf,
       nmv_context: default_nmv_context,
       deblock_delta_multi_cdf: default_delta_lf_multi_cdf,
       deblock_delta_cdf: default_delta_lf_cdf,
@@ -814,8 +825,13 @@ impl CDFContext {
     reset_2d!(self.intra_inter_cdfs);
     reset_2d!(self.angle_delta_cdf);
     reset_2d!(self.filter_intra_cdfs);
+    reset_2d!(self.comp_mode_cdf);
+    reset_2d!(self.comp_ref_type_cdf);
+    reset_3d!(self.comp_ref_cdf);
+    reset_3d!(self.comp_bwd_ref_cdf);
     reset_3d!(self.single_ref_cdfs);
     reset_2d!(self.drl_cdfs);
+    reset_2d!(self.compound_mode_cdf);
     reset_2d!(self.deblock_delta_multi_cdf);
     reset_1d!(self.deblock_delta_cdf);
 
@@ -891,6 +907,22 @@ impl CDFContext {
       self.filter_intra_cdfs.first().unwrap().as_ptr() as usize;
     let filter_intra_cdfs_end =
       filter_intra_cdfs_start + size_of_val(&self.filter_intra_cdfs);
+    let comp_mode_cdf_start =
+      self.comp_mode_cdf.first().unwrap().as_ptr() as usize;
+    let comp_mode_cdf_end =
+      comp_mode_cdf_start + size_of_val(&self.comp_mode_cdf);
+    let comp_ref_type_cdf_start =
+      self.comp_ref_type_cdf.first().unwrap().as_ptr() as usize;
+    let comp_ref_type_cdf_end =
+      comp_ref_type_cdf_start + size_of_val(&self.comp_ref_type_cdf);
+    let comp_ref_cdf_start =
+      self.comp_ref_cdf.first().unwrap().as_ptr() as usize;
+    let comp_ref_cdf_end =
+      comp_ref_cdf_start + size_of_val(&self.comp_ref_cdf);
+    let comp_bwd_ref_cdf_start =
+      self.comp_bwd_ref_cdf.first().unwrap().as_ptr() as usize;
+    let comp_bwd_ref_cdf_end =
+      comp_bwd_ref_cdf_start + size_of_val(&self.comp_bwd_ref_cdf);
     let deblock_delta_multi_cdf_start =
       self.deblock_delta_multi_cdf.first().unwrap().as_ptr() as usize;
     let deblock_delta_multi_cdf_end =
@@ -965,6 +997,10 @@ impl CDFContext {
       ("intra_inter_cdfs", intra_inter_cdfs_start, intra_inter_cdfs_end),
       ("angle_delta_cdf", angle_delta_cdf_start, angle_delta_cdf_end),
       ("filter_intra_cdfs", filter_intra_cdfs_start, filter_intra_cdfs_end),
+      ("comp_mode_cdf", comp_mode_cdf_start, comp_mode_cdf_end),
+      ("comp_ref_type_cdf", comp_ref_type_cdf_start, comp_ref_type_cdf_end),
+      ("comp_ref_cdf", comp_ref_cdf_start, comp_ref_cdf_end),
+      ("comp_bwd_ref_cdf", comp_bwd_ref_cdf_start, comp_bwd_ref_cdf_end),
       ("deblock_delta_multi_cdf", deblock_delta_multi_cdf_start, deblock_delta_multi_cdf_end),
       ("deblock_delta_cdf", deblock_delta_cdf_start, deblock_delta_cdf_end),
       ("txb_skip_cdf", txb_skip_cdf_start, txb_skip_cdf_end),
@@ -1158,7 +1194,7 @@ impl Block {
     self.mode >= PredictionMode::NEARESTMV
   }
   pub fn has_second_ref(&self) -> bool {
-    self.ref_frames[1] > INTRA_FRAME
+    self.ref_frames[1] > INTRA_FRAME && self.ref_frames[1] != NONE_FRAME
   }
 }
 
@@ -1426,24 +1462,24 @@ impl BlockContext {
     self.for_each(bo, bsize, |block| block.skip = skip);
   }
 
-  pub fn set_ref_frame(&mut self, bo: &BlockOffset, bsize: BlockSize, r: usize) {
+  pub fn set_ref_frames(&mut self, bo: &BlockOffset, bsize: BlockSize, r: &[usize; 2]) {
     let bw = bsize.width_mi();
     let bh = bsize.height_mi();
 
     for y in 0..bh {
       for x in 0..bw {
-        self.blocks[bo.y + y as usize][bo.x + x as usize].ref_frames[0] = r;
+        self.blocks[bo.y + y as usize][bo.x + x as usize].ref_frames = *r;
       }
     }
   }
 
-  pub fn set_motion_vector(&mut self, bo: &BlockOffset, bsize: BlockSize, mv: MotionVector) {
+  pub fn set_motion_vectors(&mut self, bo: &BlockOffset, bsize: BlockSize, mvs: &[MotionVector; 2]) {
     let bw = bsize.width_mi();
     let bh = bsize.height_mi();
 
     for y in 0..bh {
       for x in 0..bw {
-        self.blocks[bo.y + y as usize][bo.x + x as usize].mv[0] = mv;
+        self.blocks[bo.y + y as usize][bo.x + x as usize].mv = *mvs;
       }
     }
   }
@@ -1981,9 +2017,9 @@ impl ContextWriter {
     false
   }
 
-  fn find_matching_mv_and_update_weight(&self, blk: &Block, mv_stack: &mut Vec<CandidateMV>, weight: u32) -> bool {
+  fn find_matching_mv_and_update_weight(&self, mv: &MotionVector, mv_stack: &mut Vec<CandidateMV>, weight: u32) -> bool {
     for mut mv_cand in mv_stack {
-      if blk.mv[0].row == mv_cand.this_mv.row && blk.mv[0].col == mv_cand.this_mv.col {
+      if mv.row == mv_cand.this_mv.row && mv.col == mv_cand.this_mv.col {
         mv_cand.weight += weight;
         return true;
       }
@@ -1991,66 +2027,140 @@ impl ContextWriter {
     false
   }
 
-  fn add_ref_mv_candidate(&self, ref_frame: usize, blk: &Block, mv_stack: &mut Vec<CandidateMV>,
-                          weight: u32, newmv_count: &mut usize) -> bool {
-    if !blk.is_inter() { /* For intrabc */
-      return false;
+  fn find_matching_comp_mv_and_update_weight(&self, mvs: &[MotionVector; 2], mv_stack: &mut Vec<CandidateMV>, weight: u32) -> bool {
+    for mut mv_cand in mv_stack {
+      if mvs[0].row == mv_cand.this_mv.row && mvs[0].col == mv_cand.this_mv.col &&
+        mvs[1].row == mv_cand.comp_mv.row && mvs[1].col == mv_cand.comp_mv.col {
+        mv_cand.weight += weight;
+        return true;
+      }
     }
+    false
+  }
 
-    if blk.ref_frames[0] == ref_frame {
-      let found_match = self.find_matching_mv_and_update_weight(blk, mv_stack, weight);
+  fn add_ref_mv_candidate(&self, ref_frames: &[usize; 2], blk: &Block, mv_stack: &mut Vec<CandidateMV>,
+                          weight: u32, newmv_count: &mut usize, is_compound: bool) -> bool {
+    if !blk.is_inter() { /* For intrabc */
+      false
+    } else if is_compound {
+      if blk.ref_frames[0] == ref_frames[0] && blk.ref_frames[1] == ref_frames[1] {
+        let found_match = self.find_matching_comp_mv_and_update_weight(&blk.mv, mv_stack, weight);
 
-      if !found_match && mv_stack.len() < MAX_REF_MV_STACK_SIZE {
-        let mv_cand = CandidateMV {
-          this_mv: blk.mv[0],
-          comp_mv: blk.mv[1],
-          weight: weight
-        };
+        if !found_match && mv_stack.len() < MAX_REF_MV_STACK_SIZE {
+          let mv_cand = CandidateMV {
+            this_mv: blk.mv[0],
+            comp_mv: blk.mv[1],
+            weight: weight
+          };
 
-        mv_stack.push(mv_cand);
-      }
+          mv_stack.push(mv_cand);
+        }
 
-      if blk.mode == PredictionMode::NEWMV {
-        *newmv_count += 1;
-      }
+        if blk.mode == PredictionMode::NEW_NEWMV ||
+          blk.mode == PredictionMode::NEAREST_NEWMV ||
+          blk.mode == PredictionMode::NEW_NEARESTMV ||
+          blk.mode == PredictionMode::NEAR_NEWMV ||
+          blk.mode == PredictionMode::NEW_NEARMV {
+          *newmv_count += 1;
+        }
 
-      true
+        true
+      } else {
+        false
+      }
     } else {
-      false
+      let mut found = false;
+      for i in 0..2 {
+        if blk.ref_frames[i] == ref_frames[0] {
+          let found_match = self.find_matching_mv_and_update_weight(&blk.mv[i], mv_stack, weight);
+
+          if !found_match && mv_stack.len() < MAX_REF_MV_STACK_SIZE {
+            let mv_cand = CandidateMV {
+              this_mv: blk.mv[i],
+              comp_mv: MotionVector { row: 0, col: 0 },
+              weight: weight
+            };
+
+            mv_stack.push(mv_cand);
+          }
+
+          if blk.mode == PredictionMode::NEW_NEWMV ||
+            blk.mode == PredictionMode::NEAREST_NEWMV ||
+            blk.mode == PredictionMode::NEW_NEARESTMV ||
+            blk.mode == PredictionMode::NEAR_NEWMV ||
+            blk.mode == PredictionMode::NEW_NEARMV ||
+            blk.mode == PredictionMode::NEWMV {
+            *newmv_count += 1;
+          }
+
+          found = true;
+        }
+      }
+      found
     }
   }
 
   fn add_extra_mv_candidate(
     &self,
     blk: &Block,
-    ref_frame: usize,
+    ref_frames: &[usize; 2],
     mv_stack: &mut Vec<CandidateMV>,
-    fi: &FrameInvariants
+    fi: &FrameInvariants,
+    is_compound: bool,
+    ref_id_count: &mut [usize; 2],
+    ref_id_mvs: &mut [[MotionVector; 2]; 2],
+    ref_diff_count: &mut [usize; 2],
+    ref_diff_mvs: &mut [[MotionVector; 2]; 2],
   ) {
-    for cand_list in 0..2 {
-      if blk.ref_frames[cand_list] > INTRA_FRAME {
-        let mut mv = blk.mv[0];
-        if fi.ref_frame_sign_bias[blk.ref_frames[cand_list] - LAST_FRAME] !=
-        fi.ref_frame_sign_bias[ref_frame - LAST_FRAME] {
-          mv.row = -mv.row;
-          mv.col = -mv.col;
+    if is_compound {
+      for cand_list in 0..2 {
+        let cand_ref = blk.ref_frames[cand_list];
+        if cand_ref > INTRA_FRAME && cand_ref != NONE_FRAME {
+          for list in 0..2 {
+            let mut cand_mv = blk.mv[cand_list];
+            if cand_ref == ref_frames[list] && ref_id_count[list] < 2 {
+              ref_id_mvs[list][ref_id_count[list]] = cand_mv;
+              ref_id_count[list] = ref_id_count[list] + 1;
+            } else if ref_diff_count[list] < 2 {
+              if fi.ref_frame_sign_bias[cand_ref - LAST_FRAME] !=
+                fi.ref_frame_sign_bias[ref_frames[list] - LAST_FRAME] {
+                cand_mv.row = -cand_mv.row;
+                cand_mv.col = -cand_mv.col;
+              }
+              ref_diff_mvs[list][ref_diff_count[list]] = cand_mv;
+              ref_diff_count[list] = ref_diff_count[list] + 1;
+            }
+          }
         }
+      }
+    } else {
+      for cand_list in 0..2 {
+        let cand_ref = blk.ref_frames[cand_list];
+        if cand_ref > INTRA_FRAME && cand_ref != NONE_FRAME {
+          let mut mv = blk.mv[cand_list];
+          if fi.ref_frame_sign_bias[cand_ref - LAST_FRAME] !=
+            fi.ref_frame_sign_bias[ref_frames[0] - LAST_FRAME] {
+            mv.row = -mv.row;
+            mv.col = -mv.col;
+          }
 
-        if !self.find_matching_mv(&mv, mv_stack) {
-          let mv_cand = CandidateMV {
-            this_mv: mv,
-            comp_mv: mv,
-            weight: 2
-          };
-          mv_stack.push(mv_cand);
+          if !self.find_matching_mv(&mv, mv_stack) {
+            let mv_cand = CandidateMV {
+              this_mv: mv,
+              comp_mv: MotionVector { row: 0, col: 0 },
+              weight: 2
+            };
+            mv_stack.push(mv_cand);
+          }
         }
       }
     }
   }
 
   fn scan_row_mbmi(&mut self, bo: &BlockOffset, row_offset: isize, max_row_offs: isize,
-                   processed_rows: &mut isize, ref_frame: usize,
-                   mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize) -> bool {
+                   processed_rows: &mut isize, ref_frames: &[usize; 2],
+                   mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize,
+                   is_compound: bool) -> bool {
     let bc = &self.bc;
     let target_n4_w = bsize.width_mi();
 
@@ -2091,7 +2201,7 @@ impl ContextWriter {
         *processed_rows = (inc as isize) - row_offset - 1;
       }
 
-      if self.add_ref_mv_candidate(ref_frame, cand, mv_stack, len as u32 * weight, newmv_count) {
+      if self.add_ref_mv_candidate(ref_frames, cand, mv_stack, len as u32 * weight, newmv_count, is_compound) {
         found_match = true;
       }
 
@@ -2102,8 +2212,9 @@ impl ContextWriter {
   }
 
   fn scan_col_mbmi(&mut self, bo: &BlockOffset, col_offset: isize, max_col_offs: isize,
-                   processed_cols: &mut isize, ref_frame: usize,
-                   mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize) -> bool {
+                   processed_cols: &mut isize, ref_frames: &[usize; 2],
+                   mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize,
+                   is_compound: bool) -> bool {
     let bc = &self.bc;
 
     let target_n4_h = bsize.height_mi();
@@ -2144,7 +2255,7 @@ impl ContextWriter {
         *processed_cols = (inc as isize) - col_offset - 1;
       }
 
-      if self.add_ref_mv_candidate(ref_frame, cand, mv_stack, len as u32 * weight, newmv_count) {
+      if self.add_ref_mv_candidate(ref_frames, cand, mv_stack, len as u32 * weight, newmv_count, is_compound) {
         found_match = true;
       }
 
@@ -2154,15 +2265,16 @@ impl ContextWriter {
     found_match
   }
 
-  fn scan_blk_mbmi(&mut self, bo: &BlockOffset, ref_frame: usize,
-                   mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize) -> bool {
+  fn scan_blk_mbmi(&mut self, bo: &BlockOffset, ref_frames: &[usize; 2],
+                   mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize,
+                   is_compound: bool) -> bool {
     if bo.x >= self.bc.cols || bo.y >= self.bc.rows {
       return false;
     }
 
     let weight = 2 * BLOCK_8X8.width_mi() as u32;
     /* Always assume its within a tile, probably wrong */
-    self.add_ref_mv_candidate(ref_frame, self.bc.at(bo), mv_stack, weight, newmv_count)
+    self.add_ref_mv_candidate(ref_frames, self.bc.at(bo), mv_stack, weight, newmv_count, is_compound)
   }
 
   fn add_offset(&mut self, mv_stack: &mut Vec<CandidateMV>) {
@@ -2171,8 +2283,8 @@ impl ContextWriter {
     }
   }
 
-  fn setup_mvref_list(&mut self, bo: &BlockOffset, ref_frame: usize, mv_stack: &mut Vec<CandidateMV>,
-                      bsize: BlockSize, is_sec_rect: bool, fi: &FrameInvariants) -> usize {
+  fn setup_mvref_list(&mut self, bo: &BlockOffset, ref_frames: &[usize; 2], mv_stack: &mut Vec<CandidateMV>,
+                      bsize: BlockSize, is_sec_rect: bool, fi: &FrameInvariants, is_compound: bool) -> usize {
     let (_rf, _rf_num) = self.get_mvref_ref_frames(INTRA_FRAME);
 
     let target_n4_h = bsize.height_mi();
@@ -2218,18 +2330,18 @@ impl ContextWriter {
     let mut newmv_count: usize = 0;
 
     if max_row_offs.abs() >= 1 {
-      let found_match = self.scan_row_mbmi(bo, -1, max_row_offs, &mut processed_rows, ref_frame, mv_stack,
-                                           &mut newmv_count, bsize);
+      let found_match = self.scan_row_mbmi(bo, -1, max_row_offs, &mut processed_rows, ref_frames, mv_stack,
+                                           &mut newmv_count, bsize, is_compound);
       row_match |= found_match;
     }
     if max_col_offs.abs() >= 1 {
-      let found_match = self.scan_col_mbmi(bo, -1, max_col_offs, &mut processed_cols, ref_frame, mv_stack,
-                                           &mut newmv_count, bsize);
+      let found_match = self.scan_col_mbmi(bo, -1, max_col_offs, &mut processed_cols, ref_frames, mv_stack,
+                                           &mut newmv_count, bsize, is_compound);
       col_match |= found_match;
     }
     if self.has_tr(bo, bsize, is_sec_rect) {
-      let found_match = self.scan_blk_mbmi(&bo.with_offset(target_n4_w as isize, -1), ref_frame, mv_stack,
-                                           &mut newmv_count);
+      let found_match = self.scan_blk_mbmi(&bo.with_offset(target_n4_w as isize, -1), ref_frames, mv_stack,
+                                           &mut newmv_count, is_compound);
       row_match |= found_match;
     }
 
@@ -2240,7 +2352,9 @@ impl ContextWriter {
     /* Scan the second outer area. */
     let mut far_newmv_count: usize = 0; // won't be used
 
-    let found_match = self.scan_blk_mbmi(&bo.with_offset(-1, -1), ref_frame, mv_stack, &mut far_newmv_count);
+    let found_match = self.scan_blk_mbmi(
+      &bo.with_offset(-1, -1), ref_frames, mv_stack, &mut far_newmv_count, is_compound
+    );
     row_match |= found_match;
 
     for idx in 2..MVREF_ROW_COLS+1 {
@@ -2248,14 +2362,14 @@ impl ContextWriter {
       let col_offset = -2 * idx as isize + 1 + col_adj as isize;
 
       if row_offset.abs() <= max_row_offs.abs() && row_offset.abs() > processed_rows {
-        let found_match = self.scan_row_mbmi(bo, row_offset, max_row_offs, &mut processed_rows, ref_frame, mv_stack,
-                                             &mut far_newmv_count, bsize);
+        let found_match = self.scan_row_mbmi(bo, row_offset, max_row_offs, &mut processed_rows, ref_frames, mv_stack,
+                                             &mut far_newmv_count, bsize, is_compound);
         row_match |= found_match;
       }
 
       if col_offset.abs() <= max_col_offs.abs() && col_offset.abs() > processed_cols {
-        let found_match = self.scan_col_mbmi(bo, col_offset, max_col_offs, &mut processed_cols, ref_frame, mv_stack,
-                                             &mut far_newmv_count, bsize);
+        let found_match = self.scan_col_mbmi(bo, col_offset, max_col_offs, &mut processed_cols, ref_frames, mv_stack,
+                                             &mut far_newmv_count, bsize, is_compound);
         col_match |= found_match;
       }
     }
@@ -2264,13 +2378,13 @@ impl ContextWriter {
 
     assert!(total_match >= nearest_match);
 
+    // mode_context contains both newmv_context and refmv_context, where newmv_context
+    // lies in the REF_MVOFFSET least significant bits
     let mode_context = match nearest_match {
-                         0 =>  cmp::min(total_match, 1) + (total_match << REFMV_OFFSET) ,
-                         1 =>  3 - cmp::min(newmv_count, 1) + ((2 + total_match) << REFMV_OFFSET) ,
-                         _ =>  5 - cmp::min(newmv_count, 1) + (5 << REFMV_OFFSET)
-                       };
-
-    // println!("{} {} {} {} {}", bo.x, bo.y, nearest_match, total_match, mode_context);
+      0 =>  cmp::min(total_match, 1) + (total_match << REFMV_OFFSET),
+      1 =>  3 - cmp::min(newmv_count, 1) + ((2 + total_match) << REFMV_OFFSET),
+      _ =>  5 - cmp::min(newmv_count, 1) + (5 << REFMV_OFFSET)
+    };
 
     /* TODO: Find nearest match and assign nearest and near mvs */
 
@@ -2286,6 +2400,11 @@ impl ContextWriter {
 
       let passes = if up_avail { 0 } else { 1 } .. if left_avail { 2 } else { 1 };
 
+      let mut ref_id_count = [0 as usize; 2];
+      let mut ref_diff_count = [0 as usize; 2];
+      let mut ref_id_mvs = [[MotionVector { row: 0, col: 0 }; 2]; 2];
+      let mut ref_diff_mvs = [[MotionVector { row: 0, col: 0 }; 2]; 2];
+
       for pass in passes {
         let mut idx = 0;
         while idx < num4x4 && mv_stack.len() < 2 {
@@ -2296,7 +2415,10 @@ impl ContextWriter {
           };
 
           let blk = &self.bc.at(&rbo);
-          self.add_extra_mv_candidate(blk, ref_frame, mv_stack, fi);
+          self.add_extra_mv_candidate(
+            blk, ref_frames, mv_stack, fi, is_compound,
+            &mut ref_id_count, &mut ref_id_mvs, &mut ref_diff_count, &mut ref_diff_mvs
+          );
 
           idx += if pass == 0 {
             blk.n4_w
@@ -2305,6 +2427,55 @@ impl ContextWriter {
           };
         }
       }
+
+      if is_compound {
+        let mut combined_mvs = [[MotionVector { row: 0, col: 0}; 2]; 2];
+
+        for list in 0..2 {
+          let mut comp_count = 0;
+          for idx in 0..ref_id_count[list] {
+            combined_mvs[comp_count][list] = ref_id_mvs[list][idx];
+            comp_count = comp_count + 1;
+          }
+          for idx in 0..ref_diff_count[list] {
+            if comp_count < 2 {
+              combined_mvs[comp_count][list] = ref_diff_mvs[list][idx];
+              comp_count = comp_count + 1;
+            }
+          }
+        }
+
+        if mv_stack.len() == 1 {
+          let mv_cand = if combined_mvs[0][0].row == mv_stack[0].this_mv.row &&
+            combined_mvs[0][0].col == mv_stack[0].this_mv.col &&
+            combined_mvs[0][1].row == mv_stack[0].comp_mv.row &&
+            combined_mvs[0][1].col == mv_stack[0].comp_mv.col {
+            CandidateMV {
+              this_mv: combined_mvs[1][0],
+              comp_mv: combined_mvs[1][1],
+              weight: 2
+            }
+          } else {
+            CandidateMV {
+              this_mv: combined_mvs[0][0],
+              comp_mv: combined_mvs[0][1],
+              weight: 2
+            }
+          };
+          mv_stack.push(mv_cand);
+        } else {
+          for idx in 0..2 {
+            let mv_cand = CandidateMV {
+              this_mv: combined_mvs[idx][0],
+              comp_mv: combined_mvs[idx][1],
+              weight: 2
+            };
+            mv_stack.push(mv_cand);
+          }
+        }
+
+        assert!(mv_stack.len() == 2);
+      }
     }
 
     /* TODO: Handle single reference frame extension */
@@ -2321,33 +2492,36 @@ impl ContextWriter {
       let mvy_max = (self.bc.rows - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
       mv.this_mv.row = (mv.this_mv.row as isize).max(mvy_min).min(mvy_max) as i16;
       mv.this_mv.col = (mv.this_mv.col as isize).max(mvx_min).min(mvx_max) as i16;
+      mv.comp_mv.row = (mv.comp_mv.row as isize).max(mvy_min).min(mvy_max) as i16;
+      mv.comp_mv.col = (mv.comp_mv.col as isize).max(mvx_min).min(mvx_max) as i16;
     }
 
     mode_context
   }
 
-  pub fn find_mvrefs(&mut self, bo: &BlockOffset, ref_frame: usize,
+  pub fn find_mvrefs(&mut self, bo: &BlockOffset, ref_frames: &[usize; 2],
                      mv_stack: &mut Vec<CandidateMV>, bsize: BlockSize, is_sec_rect: bool,
-                     fi: &FrameInvariants) -> usize {
-    if ref_frame < REF_FRAMES {
-      if ref_frame != INTRA_FRAME {
+                     fi: &FrameInvariants, is_compound: bool) -> usize {
+    assert!(ref_frames[0] != NONE_FRAME);
+    if ref_frames[0] < REF_FRAMES {
+      if ref_frames[0] != INTRA_FRAME {
         /* TODO: convert global mv to an mv here */
       } else {
         /* TODO: set the global mv ref to invalid here */
       }
     }
 
-    if ref_frame != INTRA_FRAME {
+    if ref_frames[0] != INTRA_FRAME {
       /* TODO: Set zeromv ref to the converted global motion vector */
     } else {
       /* TODO: Set the zeromv ref to 0 */
     }
 
-    if ref_frame <= INTRA_FRAME {
+    if ref_frames[0] <= INTRA_FRAME {
       return 0;
     }
 
-    let mode_context = self.setup_mvref_list(bo, ref_frame, mv_stack, bsize, is_sec_rect, fi);
+    let mode_context = self.setup_mvref_list(bo, ref_frames, mv_stack, bsize, is_sec_rect, fi, is_compound);
     mode_context
   }
 
@@ -2373,6 +2547,16 @@ impl ContextWriter {
       self.bc.at_mut(bo).neighbors_ref_counts = ref_counts;
   }
 
+  fn ref_count_ctx(counts0: usize, counts1: usize) -> usize {
+    if counts0 < counts1 {
+      0
+    } else if counts0 == counts1 {
+      1
+    } else {
+      2
+    }
+  }
+
   fn get_ref_frame_ctx_b0(&mut self, bo: &BlockOffset) -> usize {
     let ref_counts = self.bc.at(bo).neighbors_ref_counts;
 
@@ -2382,137 +2566,255 @@ impl ContextWriter {
     let bwd_cnt = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] +
                   ref_counts[ALTREF_FRAME];
 
-    if fwd_cnt == bwd_cnt {
-      return 1;
-    } else if fwd_cnt < bwd_cnt {
-      return 0;
-    } else {
-      return 2;
-    }
+    ContextWriter::ref_count_ctx(fwd_cnt, bwd_cnt)
   }
 
   fn get_pred_ctx_brfarf2_or_arf(&mut self, bo: &BlockOffset) -> usize {
     let ref_counts = self.bc.at(bo).neighbors_ref_counts;
 
-    let brfarf2_count = ref_counts[BWDREF_FRAME] +
-                        ref_counts[ALTREF2_FRAME];
-
+    let brfarf2_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME];
     let arf_count = ref_counts[ALTREF_FRAME];
 
-    if brfarf2_count == arf_count {
-      return 1;
-    } else if brfarf2_count < arf_count {
-      return 0;
-    } else {
-      return 2;
-    }
+    ContextWriter::ref_count_ctx(brfarf2_count, arf_count)
   }
 
   fn get_pred_ctx_ll2_or_l3gld(&mut self, bo: &BlockOffset) -> usize {
     let ref_counts = self.bc.at(bo).neighbors_ref_counts;
 
-    let l_l2_count = ref_counts[LAST_FRAME] +
-                        ref_counts[LAST2_FRAME];
-
-    let l3_gold_count = ref_counts[LAST3_FRAME] +
-                        ref_counts[GOLDEN_FRAME];
+    let l_l2_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME];
+    let l3_gold_count = ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
 
-    if l_l2_count == l3_gold_count {
-      return 1;
-    } else if l_l2_count < l3_gold_count {
-      return 0;
-    } else {
-      return 2;
-    }
+    ContextWriter::ref_count_ctx(l_l2_count, l3_gold_count)
   }
 
   fn get_pred_ctx_last_or_last2(&mut self, bo: &BlockOffset) -> usize {
     let ref_counts = self.bc.at(bo).neighbors_ref_counts;
 
     let l_count = ref_counts[LAST_FRAME];
-
     let l2_count = ref_counts[LAST2_FRAME];
 
-    if l_count == l2_count {
-      return 1;
-    } else if l_count < l2_count {
-      return 0;
-    } else {
-      return 2;
-    }
+    ContextWriter::ref_count_ctx(l_count, l2_count)
   }
 
   fn get_pred_ctx_last3_or_gold(&mut self, bo: &BlockOffset) -> usize {
     let ref_counts = self.bc.at(bo).neighbors_ref_counts;
 
     let l3_count = ref_counts[LAST3_FRAME];
-
     let gold_count = ref_counts[GOLDEN_FRAME];
 
-    if l3_count == gold_count {
-      return 1;
-    } else if l3_count < gold_count {
-      return 0;
-    } else {
-      return 2;
-    }
+    ContextWriter::ref_count_ctx(l3_count, gold_count)
   }
 
   fn get_pred_ctx_brf_or_arf2(&mut self, bo: &BlockOffset) -> usize {
     let ref_counts = self.bc.at(bo).neighbors_ref_counts;
 
     let brf_count = ref_counts[BWDREF_FRAME];
-
     let arf2_count = ref_counts[ALTREF2_FRAME];
 
-    if brf_count == arf2_count {
-      return 1;
-    } else if brf_count < arf2_count {
-      return 0;
+    ContextWriter::ref_count_ctx(brf_count, arf2_count)
+  }
+
+  fn get_comp_mode_ctx(&self, bo: &BlockOffset) -> usize {
+    fn check_backward(ref_frame: usize) -> bool {
+      ref_frame >= BWDREF_FRAME && ref_frame <= ALTREF_FRAME
+    }
+    let avail_left = bo.x > 0;
+    let avail_up = bo.y > 0;
+    let bo_left = bo.with_offset(-1, 0);
+    let bo_up = bo.with_offset(0, -1);
+    let above0 = if avail_up { self.bc.at(&bo_up).ref_frames[0] } else { INTRA_FRAME };
+    let above1 = if avail_up { self.bc.at(&bo_up).ref_frames[1] } else { NONE_FRAME };
+    let left0 = if avail_left { self.bc.at(&bo_left).ref_frames[0] } else { INTRA_FRAME };
+    let left1 = if avail_left { self.bc.at(&bo_left).ref_frames[1] } else { NONE_FRAME };
+    let left_single = left1 == NONE_FRAME;
+    let above_single = above1 == NONE_FRAME;
+    let left_intra = left0 == INTRA_FRAME;
+    let above_intra = above0 == INTRA_FRAME;
+    let left_backward = check_backward(left0);
+    let above_backward = check_backward(above0);
+
+    if avail_left && avail_up {
+      if above_single && left_single {
+        (above_backward ^ left_backward) as usize
+      } else if above_single {
+        2 + (above_backward || above_intra) as usize
+      } else if left_single {
+        2 + (left_backward || left_intra) as usize
+      } else {
+        4
+      }
+    } else if avail_up {
+      if above_single {
+        above_backward as usize
+      } else {
+        3
+      }
+    } else if avail_left {
+      if left_single {
+        left_backward as usize
+      } else {
+        3
+      }
+    } else {
+      1
+    }
+  }
+
+  fn get_comp_ref_type_ctx(&self, bo: &BlockOffset) -> usize {
+    fn is_samedir_ref_pair(ref0: usize, ref1: usize) -> bool {
+      (ref0 >= BWDREF_FRAME && ref0 != NONE_FRAME) == (ref1 >= BWDREF_FRAME && ref1 != NONE_FRAME)
+    }
+
+    let avail_left = bo.x > 0;
+    let avail_up = bo.y > 0;
+    let bo_left = bo.with_offset(-1, 0);
+    let bo_up = bo.with_offset(0, -1);
+    let above0 = if avail_up { self.bc.at(&bo_up).ref_frames[0] } else { INTRA_FRAME };
+    let above1 = if avail_up { self.bc.at(&bo_up).ref_frames[1] } else { NONE_FRAME };
+    let left0 = if avail_left { self.bc.at(&bo_left).ref_frames[0] } else { INTRA_FRAME };
+    let left1 = if avail_left { self.bc.at(&bo_left).ref_frames[1] } else { NONE_FRAME };
+    let left_single = left1 == NONE_FRAME;
+    let above_single = above1 == NONE_FRAME;
+    let left_intra = left0 == INTRA_FRAME;
+    let above_intra = above0 == INTRA_FRAME;
+    let above_comp_inter = avail_up && !above_intra && !above_single;
+    let left_comp_inter = avail_left && !left_intra && !left_single;
+    let above_uni_comp = above_comp_inter && is_samedir_ref_pair(above0, above1);
+    let left_uni_comp = left_comp_inter && is_samedir_ref_pair(left0, left1);
+
+    if avail_up && !above_intra && avail_left && !left_intra {
+      let samedir = is_samedir_ref_pair(above0, left0);
+
+      if !above_comp_inter && !left_comp_inter {
+        1 + 2 * samedir as usize
+      } else if !above_comp_inter {
+        if !left_uni_comp { 1 } else { 3 + samedir as usize }
+      } else if !left_comp_inter {
+        if !above_uni_comp { 1 } else { 3 + samedir as usize }
+      } else {
+        if !above_uni_comp && !left_uni_comp {
+          0
+        } else if !above_uni_comp || !left_uni_comp {
+          2
+        } else {
+          3 + ((above0 == BWDREF_FRAME) == (left0 == BWDREF_FRAME)) as usize
+        }
+      }
+    } else if avail_up && avail_left {
+      if above_comp_inter {
+        1 + 2 * above_uni_comp as usize
+      } else if left_comp_inter {
+        1 + 2 * left_uni_comp as usize
+      } else {
+        2
+      }
+    } else if above_comp_inter {
+      4 * above_uni_comp as usize
+    } else if left_comp_inter {
+      4 * left_uni_comp as usize
     } else {
-      return 2;
+      2
     }
   }
 
-  pub fn write_ref_frames(&mut self, w: &mut dyn Writer, bo: &BlockOffset) {
+  pub fn write_ref_frames(&mut self, w: &mut dyn Writer, fi: &FrameInvariants, bo: &BlockOffset) {
     let rf = self.bc.at(bo).ref_frames;
+    let sz = self.bc.at(bo).n4_w.min(self.bc.at(bo).n4_h);
 
     /* TODO: Handle multiple references */
+    let comp_mode = self.bc.at(bo).has_second_ref();
 
-    let b0_ctx = self.get_ref_frame_ctx_b0(bo);
-    let b0 = rf[0] <= ALTREF_FRAME && rf[0] >= BWDREF_FRAME;
-
-    symbol_with_update!(self, w, b0 as u32, &mut self.fc.single_ref_cdfs[b0_ctx][0]);
-    if b0 {
-      let b1_ctx = self.get_pred_ctx_brfarf2_or_arf(bo);
-      let b1 = rf[0] == ALTREF_FRAME;
+    if fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
+      let ctx = self.get_comp_mode_ctx(bo);
+      symbol_with_update!(self, w, comp_mode as u32, &mut self.fc.comp_mode_cdf[ctx]);
+    } else {
+      assert!(!comp_mode);
+    }
 
-      symbol_with_update!(self, w, b1 as u32, &mut self.fc.single_ref_cdfs[b1_ctx][1]);
-      if !b1 {
-        let b5_ctx = self.get_pred_ctx_brf_or_arf2(bo);
-        let b5 = rf[0] == ALTREF2_FRAME;
+    if comp_mode {
+      let comp_ref_type = 1 as u32; // bidir
+      let ctx = self.get_comp_ref_type_ctx(bo);
+      symbol_with_update!(self, w, comp_ref_type, &mut self.fc.comp_ref_type_cdf[ctx]);
 
-        symbol_with_update!(self, w, b5 as u32, &mut self.fc.single_ref_cdfs[b5_ctx][5]);
+      if comp_ref_type == 0 {
+        unimplemented!();
+      } else {
+        let compref = rf[0] == GOLDEN_FRAME || rf[0] == LAST3_FRAME;
+        let ctx = self.get_pred_ctx_ll2_or_l3gld(bo);
+        symbol_with_update!(self, w, compref as u32, &mut self.fc.comp_ref_cdf[ctx][0]);
+        if !compref {
+          let compref_p1 = rf[0] == LAST2_FRAME;
+          let ctx = self.get_pred_ctx_last_or_last2(bo);
+          symbol_with_update!(self, w, compref_p1 as u32, &mut self.fc.comp_ref_cdf[ctx][1]);
+        } else {
+          let compref_p2 = rf[0] == GOLDEN_FRAME;
+          let ctx = self.get_pred_ctx_last3_or_gold(bo);
+          symbol_with_update!(self, w, compref_p2 as u32, &mut self.fc.comp_ref_cdf[ctx][2]);
+        }
+        let comp_bwdref = rf[1] == ALTREF_FRAME;
+        let ctx = self.get_pred_ctx_brfarf2_or_arf(bo);
+        symbol_with_update!(self, w, comp_bwdref as u32, &mut self.fc.comp_bwd_ref_cdf[ctx][0]);
+        if !comp_bwdref {
+          let comp_bwdref_p1 = rf[1] == ALTREF2_FRAME;
+          let ctx = self.get_pred_ctx_brf_or_arf2(bo);
+          symbol_with_update!(self, w, comp_bwdref_p1 as u32, &mut self.fc.comp_bwd_ref_cdf[ctx][1]);
+        }
       }
     } else {
-      let b2_ctx = self.get_pred_ctx_ll2_or_l3gld(bo);
-      let b2 = rf[0] == LAST3_FRAME || rf[0] == GOLDEN_FRAME;
+      let b0_ctx = self.get_ref_frame_ctx_b0(bo);
+      let b0 = rf[0] <= ALTREF_FRAME && rf[0] >= BWDREF_FRAME;
+
+      symbol_with_update!(self, w, b0 as u32, &mut self.fc.single_ref_cdfs[b0_ctx][0]);
+      if b0 {
+        let b1_ctx = self.get_pred_ctx_brfarf2_or_arf(bo);
+        let b1 = rf[0] == ALTREF_FRAME;
 
-      symbol_with_update!(self, w, b2 as u32, &mut self.fc.single_ref_cdfs[b2_ctx][2]);
-      if !b2 {
-        let b3_ctx = self.get_pred_ctx_last_or_last2(bo);
-        let b3 = rf[0] != LAST_FRAME;
+        symbol_with_update!(self, w, b1 as u32, &mut self.fc.single_ref_cdfs[b1_ctx][1]);
+        if !b1 {
+          let b5_ctx = self.get_pred_ctx_brf_or_arf2(bo);
+          let b5 = rf[0] == ALTREF2_FRAME;
 
-        symbol_with_update!(self, w, b3 as u32, &mut self.fc.single_ref_cdfs[b3_ctx][3]);
+          symbol_with_update!(self, w, b5 as u32, &mut self.fc.single_ref_cdfs[b5_ctx][5]);
+        }
       } else {
-        let b4_ctx = self.get_pred_ctx_last3_or_gold(bo);
-        let b4 = rf[0] != LAST3_FRAME;
+        let b2_ctx = self.get_pred_ctx_ll2_or_l3gld(bo);
+        let b2 = rf[0] == LAST3_FRAME || rf[0] == GOLDEN_FRAME;
+
+        symbol_with_update!(self, w, b2 as u32, &mut self.fc.single_ref_cdfs[b2_ctx][2]);
+        if !b2 {
+          let b3_ctx = self.get_pred_ctx_last_or_last2(bo);
+          let b3 = rf[0] != LAST_FRAME;
 
-        symbol_with_update!(self, w, b4 as u32, &mut self.fc.single_ref_cdfs[b4_ctx][4]);
+          symbol_with_update!(self, w, b3 as u32, &mut self.fc.single_ref_cdfs[b3_ctx][3]);
+        } else {
+          let b4_ctx = self.get_pred_ctx_last3_or_gold(bo);
+          let b4 = rf[0] != LAST3_FRAME;
+
+          symbol_with_update!(self, w, b4 as u32, &mut self.fc.single_ref_cdfs[b4_ctx][4]);
+        }
       }
     }
   }
 
+  pub fn write_compound_mode(
+    &mut self, w: &mut dyn Writer, mode: PredictionMode, ctx: usize,
+  ) {
+    let newmv_ctx = ctx & NEWMV_CTX_MASK;
+    let refmv_ctx = (ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+    let ctx = if refmv_ctx < 2 {
+      newmv_ctx.min(1)
+    } else if refmv_ctx < 4 {
+      (newmv_ctx + 1).min(4)
+    } else {
+      (newmv_ctx.max(1) + 3).min(7)
+    };
+
+    assert!(mode >= PredictionMode::NEAREST_NEARESTMV);
+    let val = mode as u32 - PredictionMode::NEAREST_NEARESTMV as u32;
+    symbol_with_update!(self, w, val, &mut self.fc.compound_mode_cdf[ctx]);
+  }
+
   pub fn write_inter_mode(&mut self, w: &mut dyn Writer, mode: PredictionMode, ctx: usize) {
     let newmv_ctx = ctx & NEWMV_CTX_MASK;
     symbol_with_update!(self, w, (mode != PredictionMode::NEWMV) as u32, &mut self.fc.newmv_cdf[newmv_ctx]);
diff --git a/src/encoder.rs b/src/encoder.rs
index ac098bae..64b16c19 100644
--- a/src/encoder.rs
+++ b/src/encoder.rs
@@ -218,7 +218,7 @@ impl Sequence {
             frame_id_length: 0,
             delta_frame_id_length: 0,
             use_128x128_superblock: false,
-            order_hint_bits_minus_1: 0,
+            order_hint_bits_minus_1: 3,
             force_screen_content_tools: 0,
             force_integer_mv: 2,
             still_picture: false,
@@ -252,6 +252,59 @@ impl Sequence {
         let m = 1 << self.order_hint_bits_minus_1;
         (diff & (m - 1)) - (diff & m)
     }
+
+    pub fn get_skip_mode_allowed(&self, fi: &FrameInvariants, reference_select: bool) -> bool {
+      if fi.intra_only || !reference_select || !self.enable_order_hint {
+        false
+      } else {
+        let mut forward_idx: isize = -1;
+        let mut backward_idx: isize = -1;
+        let mut forward_hint = 0;
+        let mut backward_hint = 0;
+        for i in 0..INTER_REFS_PER_FRAME {
+          if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] {
+            let ref_hint = rec.order_hint;
+            if self.get_relative_dist(ref_hint, fi.order_hint) < 0 {
+              if forward_idx < 0 || self.get_relative_dist(ref_hint, forward_hint) > 0 {
+                forward_idx = i as isize;
+                forward_hint = ref_hint;
+              }
+            } else if self.get_relative_dist(ref_hint, fi.order_hint) > 0 {
+              if backward_idx < 0 || self.get_relative_dist(ref_hint, backward_hint) > 0 {
+                backward_idx = i as isize;
+                backward_hint = ref_hint;
+              }
+            }
+          }
+        }
+        if forward_idx < 0 {
+          false
+        } else if backward_idx >= 0 {
+          // set skip_mode_frame
+          true
+        } else {
+          let mut second_forward_idx: isize = -1;
+          let mut second_forward_hint = 0;
+          for i in 0..INTER_REFS_PER_FRAME {
+            if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] {
+              let ref_hint = rec.order_hint;
+              if self.get_relative_dist(ref_hint, forward_hint) < 0 {
+                if second_forward_idx < 0 || self.get_relative_dist(ref_hint, second_forward_hint) > 0 {
+                  second_forward_idx = i as isize;
+                  second_forward_hint = ref_hint;
+                }
+              }
+            }
+          }
+          if second_forward_idx < 0 {
+            false
+          } else {
+            // set skip_mode_frame
+            true
+          }
+        }
+      }
+    }
 }
 
 use std::sync::Arc;
@@ -368,6 +421,7 @@ pub struct FrameInvariants {
     pub ref_frame_sign_bias: [bool; INTER_REFS_PER_FRAME],
     pub rec_buffer: ReferenceFramesSet,
     pub base_q_idx: u8,
+    pub me_range_scale: u8,
 }
 
 impl FrameInvariants {
@@ -436,6 +490,7 @@ impl FrameInvariants {
             ref_frame_sign_bias: [false; INTER_REFS_PER_FRAME],
             rec_buffer: ReferenceFramesSet::new(),
             base_q_idx: config.quantizer as u8,
+            me_range_scale: 1,
         }
     }
 
@@ -939,10 +994,8 @@ impl<W: io::Write> UncompressedHeader for BitWriter<W, BigEndian> {
         self.write_bit(reference_select)?;
       }
 
-      let skip_mode_allowed =
-        !(fi.intra_only  || !reference_select || !seq.enable_order_hint);
+      let skip_mode_allowed = seq.get_skip_mode_allowed(fi, reference_select);
       if skip_mode_allowed {
-        unimplemented!();
         self.write_bit(false)?; // skip_mode_present
       }
 
@@ -1268,7 +1321,7 @@ pub fn encode_tx_block(
 }
 
 pub fn motion_compensate(fi: &FrameInvariants, fs: &mut FrameState, cw: &mut ContextWriter,
-                         luma_mode: PredictionMode, ref_frame: usize, mv: MotionVector,
+                         luma_mode: PredictionMode, ref_frames: &[usize; 2], mvs: &[MotionVector; 2],
                          bsize: BlockSize, bo: &BlockOffset, bit_depth: usize,
                          luma_only: bool) {
   if luma_mode.is_intra() { return; }
@@ -1293,27 +1346,27 @@ pub fn motion_compensate(fi: &FrameInvariants, fs: &mut FrameState, cw: &mut Con
 
       if some_use_intra {
         luma_mode.predict_inter(fi, p, &po, &mut rec.mut_slice(&po), plane_bsize.width(),
-        plane_bsize.height(), ref_frame, &mv, bit_depth);
+          plane_bsize.height(), ref_frames, &mvs, bit_depth);
       } else {
         assert!(xdec == 1 && ydec == 1);
         // TODO: these are only valid for 4:2:0
-        let mv0 = &cw.bc.at(&bo.with_offset(-1,-1)).mv[0];
-        let rf0 = cw.bc.at(&bo.with_offset(-1,-1)).ref_frames[0];
-        let mv1 = &cw.bc.at(&bo.with_offset(0,-1)).mv[0];
-        let rf1 = cw.bc.at(&bo.with_offset(0,-1)).ref_frames[0];
+        let mv0 = &cw.bc.at(&bo.with_offset(-1,-1)).mv;
+        let rf0 = &cw.bc.at(&bo.with_offset(-1,-1)).ref_frames;
+        let mv1 = &cw.bc.at(&bo.with_offset(0,-1)).mv;
+        let rf1 = &cw.bc.at(&bo.with_offset(0,-1)).ref_frames;
         let po1 = PlaneOffset { x: po.x+2, y: po.y };
-        let mv2 = &cw.bc.at(&bo.with_offset(-1,0)).mv[0];
-        let rf2 = cw.bc.at(&bo.with_offset(-1,0)).ref_frames[0];
+        let mv2 = &cw.bc.at(&bo.with_offset(-1,0)).mv;
+        let rf2 = &cw.bc.at(&bo.with_offset(-1,0)).ref_frames;
         let po2 = PlaneOffset { x: po.x, y: po.y+2 };
         let po3 = PlaneOffset { x: po.x+2, y: po.y+2 };
         luma_mode.predict_inter(fi, p, &po, &mut rec.mut_slice(&po), 2, 2, rf0, mv0, bit_depth);
         luma_mode.predict_inter(fi, p, &po1, &mut rec.mut_slice(&po1), 2, 2, rf1, mv1, bit_depth);
         luma_mode.predict_inter(fi, p, &po2, &mut rec.mut_slice(&po2), 2, 2, rf2, mv2, bit_depth);
-        luma_mode.predict_inter(fi, p, &po3, &mut rec.mut_slice(&po3), 2, 2, ref_frame, &mv, bit_depth);
+        luma_mode.predict_inter(fi, p, &po3, &mut rec.mut_slice(&po3), 2, 2, ref_frames, mvs, bit_depth);
       }
     } else {
       luma_mode.predict_inter(fi, p, &po, &mut rec.mut_slice(&po), plane_bsize.width(),
-      plane_bsize.height(), ref_frame, &mv, bit_depth);
+        plane_bsize.height(), ref_frames, &mvs, bit_depth);
     }
   }
 }
@@ -1332,7 +1385,7 @@ pub fn encode_block_a(seq: &Sequence,
 pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
                  cw: &mut ContextWriter, w: &mut dyn Writer,
                  luma_mode: PredictionMode, chroma_mode: PredictionMode,
-                 ref_frame: usize, mv: MotionVector,
+                 ref_frames: &[usize; 2], mvs: &[MotionVector; 2],
                  bsize: BlockSize, bo: &BlockOffset, skip: bool, bit_depth: usize,
                  cfl: CFLParams, tx_size: TxSize, tx_type: TxType,
                  mode_context: usize, mv_stack: &Vec<CandidateMV>) {
@@ -1349,8 +1402,8 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
     }
     cw.bc.set_block_size(bo, bsize);
     cw.bc.set_mode(bo, bsize, luma_mode);
-    cw.bc.set_ref_frame(bo, bsize, ref_frame);
-    cw.bc.set_motion_vector(bo, bsize, mv);
+    cw.bc.set_ref_frames(bo, bsize, ref_frames);
+    cw.bc.set_motion_vectors(bo, bsize, mvs);
 
     //write_q_deltas();
     if cw.bc.code_deltas && fs.deblock.block_deltas_enabled && (bsize < sb_size || !skip) {
@@ -1362,43 +1415,57 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
         cw.write_is_inter(w, bo, is_inter);
         if is_inter {
             cw.fill_neighbours_ref_counts(bo);
-            cw.write_ref_frames(w, bo);
+            cw.write_ref_frames(w, fi, bo);
 
-            //let mode_context = if bo.x == 0 && bo.y == 0 { 0 } else if bo.x ==0 || bo.y == 0 { 51 } else { 85 };
             // NOTE: Until rav1e supports other inter modes than GLOBALMV
-            cw.write_inter_mode(w, luma_mode, mode_context);
+            if luma_mode >= PredictionMode::NEAREST_NEARESTMV {
+                cw.write_compound_mode(w, luma_mode, mode_context);
+            } else {
+                cw.write_inter_mode(w, luma_mode, mode_context);
+            }
+
+            let ref_mv_idx = 0;
+            let num_mv_found = mv_stack.len();
 
             if luma_mode == PredictionMode::NEWMV || luma_mode == PredictionMode::NEW_NEWMV {
-              let ref_mv_idx = 0;
-              let num_mv_found = mv_stack.len();
+              if luma_mode == PredictionMode::NEW_NEWMV { assert!(num_mv_found >= 2); }
               for idx in 0..2 {
                 if num_mv_found > idx + 1 {
                   let drl_mode = ref_mv_idx > idx;
                   let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize
                     + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize;
-
                   cw.write_drl_mode(w, drl_mode, ctx);
                   if !drl_mode { break; }
                 }
               }
+            }
 
-              let ref_mv = if num_mv_found > 0 {
-                mv_stack[ref_mv_idx].this_mv
-              } else {
-                MotionVector{ row: 0, col: 0 }
-              };
+            let ref_mvs = if num_mv_found > 0 {
+              [mv_stack[ref_mv_idx].this_mv, mv_stack[ref_mv_idx].comp_mv]
+            } else {
+              [MotionVector{ row: 0, col: 0 }; 2]
+            };
 
-              let mv_precision = if fi.force_integer_mv != 0 {
-                MvSubpelPrecision::MV_SUBPEL_NONE
-              } else if fi.allow_high_precision_mv {
-                MvSubpelPrecision::MV_SUBPEL_HIGH_PRECISION
-              } else {
-                MvSubpelPrecision::MV_SUBPEL_LOW_PRECISION
-              };
-              cw.write_mv(w, &mv, &ref_mv, mv_precision);
-            } else if luma_mode >= PredictionMode::NEAR0MV && luma_mode <= PredictionMode::NEAR2MV {
+            let mv_precision = if fi.force_integer_mv != 0 {
+              MvSubpelPrecision::MV_SUBPEL_NONE
+            } else if fi.allow_high_precision_mv {
+              MvSubpelPrecision::MV_SUBPEL_HIGH_PRECISION
+            } else {
+              MvSubpelPrecision::MV_SUBPEL_LOW_PRECISION
+            };
+
+            if luma_mode == PredictionMode::NEWMV ||
+                luma_mode == PredictionMode::NEW_NEWMV ||
+                luma_mode == PredictionMode::NEW_NEARESTMV {
+              cw.write_mv(w, &mvs[0], &ref_mvs[0], mv_precision);
+            }
+            if luma_mode == PredictionMode::NEW_NEWMV ||
+                luma_mode == PredictionMode::NEAREST_NEWMV {
+              cw.write_mv(w, &mvs[1], &ref_mvs[1], mv_precision);
+            }
+
+            if luma_mode >= PredictionMode::NEAR0MV && luma_mode <= PredictionMode::NEAR2MV {
               let ref_mv_idx = luma_mode as usize - PredictionMode::NEAR0MV as usize + 1;
-              let num_mv_found = mv_stack.len();
               if luma_mode != PredictionMode::NEAR0MV { assert!(num_mv_found > ref_mv_idx); }
 
               for idx in 1..3 {
@@ -1412,19 +1479,19 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
                 }
               }
               if mv_stack.len() > 1 {
-                assert!(mv_stack[ref_mv_idx].this_mv.row == mv.row);
-                assert!(mv_stack[ref_mv_idx].this_mv.col == mv.col);
+                assert!(mv_stack[ref_mv_idx].this_mv.row == mvs[0].row);
+                assert!(mv_stack[ref_mv_idx].this_mv.col == mvs[0].col);
               } else {
-                assert!(0 == mv.row);
-                assert!(0 == mv.col);
+                assert!(0 == mvs[0].row);
+                assert!(0 == mvs[0].col);
               }
             } else if luma_mode == PredictionMode::NEARESTMV {
               if mv_stack.len() > 0 {
-                assert!(mv_stack[0].this_mv.row == mv.row);
-                assert!(mv_stack[0].this_mv.col == mv.col);
+                assert!(mv_stack[0].this_mv.row == mvs[0].row);
+                assert!(mv_stack[0].this_mv.col == mvs[0].col);
               } else {
-                assert!(0 == mv.row);
-                assert!(0 == mv.col);
+                assert!(0 == mvs[0].row);
+                assert!(0 == mvs[0].col);
               }
             }
         } else {
@@ -1454,7 +1521,7 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
         }
     }
 
-    motion_compensate(fi, fs, cw, luma_mode, ref_frame, mv, bsize, bo, bit_depth, false);
+    motion_compensate(fi, fs, cw, luma_mode, ref_frames, mvs, bsize, bo, bit_depth, false);
 
     if is_inter {
       write_tx_tree(fi, fs, cw, w, luma_mode, bo, bsize, tx_size, tx_type, skip, bit_depth, false);
@@ -1670,8 +1737,8 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram
         pred_mode_luma: PredictionMode::DC_PRED,
         pred_mode_chroma: PredictionMode::DC_PRED,
         pred_cfl_params: CFLParams::new(),
-        ref_frame: INTRA_FRAME,
-        mv: MotionVector { row: 0, col: 0},
+        ref_frames: [INTRA_FRAME, NONE_FRAME],
+        mvs: [MotionVector { row: 0, col: 0}; 2],
         skip: false,
         tx_size: TxSize::TX_4X4,
         tx_type: TxType::DCT_DCT,
@@ -1699,27 +1766,29 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram
         let mode_decision = rdo_mode_decision(seq, fi, fs, cw, bsize, bo, pmv).part_modes[0].clone();
         let (mode_luma, mode_chroma) = (mode_decision.pred_mode_luma, mode_decision.pred_mode_chroma);
         let cfl = mode_decision.pred_cfl_params;
-        let ref_frame = mode_decision.ref_frame;
-        let mv = mode_decision.mv;
-        let skip = mode_decision.skip;
-        let mut cdef_coded = cw.bc.cdef_coded;
-        let (tx_size, tx_type) = (mode_decision.tx_size, mode_decision.tx_type);
-
-        debug_assert!((tx_size, tx_type) ==
-            rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frame, mv, skip));
-        cw.bc.set_tx_size(bo, tx_size);
-
-        rd_cost = mode_decision.rd_cost + cost;
-
-        let mut mv_stack = Vec::new();
-        let mode_context = cw.find_mvrefs(bo, ref_frame, &mut mv_stack, bsize, false, fi);
-
-        cdef_coded = encode_block_a(seq, cw, if cdef_coded  {w_post_cdef} else {w_pre_cdef},
-                                   bsize, bo, skip);
-        encode_block_b(seq, fi, fs, cw, if cdef_coded  {w_post_cdef} else {w_pre_cdef},
-                       mode_luma, mode_chroma, ref_frame, mv, bsize, bo, skip, seq.bit_depth, cfl,
-                       tx_size, tx_type, mode_context, &mv_stack);
-
+        {
+          let ref_frames = &mode_decision.ref_frames;
+          let mvs = &mode_decision.mvs;
+          let skip = mode_decision.skip;
+          let mut cdef_coded = cw.bc.cdef_coded;
+          let (tx_size, tx_type) = (mode_decision.tx_size, mode_decision.tx_type);
+
+          debug_assert!((tx_size, tx_type) ==
+              rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frames, mvs, skip));
+          cw.bc.set_tx_size(bo, tx_size);
+
+          rd_cost = mode_decision.rd_cost + cost;
+
+          let mut mv_stack = Vec::new();
+          let is_compound = ref_frames[1] != NONE_FRAME;
+          let mode_context = cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, false, fi, is_compound);
+
+          cdef_coded = encode_block_a(seq, cw, if cdef_coded  {w_post_cdef} else {w_pre_cdef},
+                                     bsize, bo, skip);
+          encode_block_b(seq, fi, fs, cw, if cdef_coded  {w_post_cdef} else {w_pre_cdef},
+                         mode_luma, mode_chroma, ref_frames, mvs, bsize, bo, skip, seq.bit_depth, cfl,
+                         tx_size, tx_type, mode_context, &mv_stack);
+        }
         best_decision = mode_decision;
     }
 
@@ -1759,7 +1828,7 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram
                     w_post_cdef,
                     subsize,
                     offset,
-                    &best_decision.mv
+                    &best_decision.mvs[0]
                 )
             }).sum::<f64>();
 
@@ -1781,23 +1850,24 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram
             // FIXME: redundant block re-encode
             let (mode_luma, mode_chroma) = (best_decision.pred_mode_luma, best_decision.pred_mode_chroma);
             let cfl = best_decision.pred_cfl_params;
-            let ref_frame = best_decision.ref_frame;
-            let mv = best_decision.mv;
+            let ref_frames = &best_decision.ref_frames;
+            let mvs = &best_decision.mvs;
             let skip = best_decision.skip;
             let mut cdef_coded = cw.bc.cdef_coded;
             let (tx_size, tx_type) = (best_decision.tx_size, best_decision.tx_type);
 
             debug_assert!((tx_size, tx_type) ==
-                rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frame, mv, skip));
+                rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frames, mvs, skip));
             cw.bc.set_tx_size(bo, tx_size);
 
             let mut mv_stack = Vec::new();
-            let mode_context = cw.find_mvrefs(bo, ref_frame, &mut mv_stack, bsize, false, fi);
+            let is_compound = ref_frames[1] != NONE_FRAME;
+            let mode_context = cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, false, fi, is_compound);
 
             cdef_coded = encode_block_a(seq, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef},
                                        bsize, bo, skip);
             encode_block_b(seq, fi, fs, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef},
-                          mode_luma, mode_chroma, ref_frame, mv, bsize, bo, skip, seq.bit_depth, cfl,
+                          mode_luma, mode_chroma, ref_frames, mvs, bsize, bo, skip, seq.bit_depth, cfl,
                           tx_size, tx_type, mode_context, &mv_stack);
         }
     }
@@ -1874,42 +1944,64 @@ fn encode_partition_topdown(seq: &Sequence, fi: &FrameInvariants, fs: &mut Frame
 
             let cfl = part_decision.pred_cfl_params;
             let skip = part_decision.skip;
-            let ref_frame = part_decision.ref_frame;
-            let mv = part_decision.mv;
+            let ref_frames = &part_decision.ref_frames;
+            let mvs = &part_decision.mvs;
             let mut cdef_coded = cw.bc.cdef_coded;
 
             // NOTE: Cannot avoid calling rdo_tx_size_type() here again,
             // because, with top-down partition RDO, the neighnoring contexts
             // of current partition can change, i.e. neighboring partitions can split down more.
             let (tx_size, tx_type) =
-                rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frame, mv, skip);
+                rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frames, mvs, skip);
 
             let mut mv_stack = Vec::new();
-            let mode_context = cw.find_mvrefs(bo, ref_frame, &mut mv_stack, bsize, false, fi);
-
-            if !mode_luma.is_intra() && mode_luma != PredictionMode::GLOBALMV {
-              mode_luma = PredictionMode::NEWMV;
-              for (c, m) in mv_stack.iter().take(4)
-                .zip([PredictionMode::NEARESTMV, PredictionMode::NEAR0MV,
-                      PredictionMode::NEAR1MV, PredictionMode::NEAR2MV].iter()) {
-                if c.this_mv.row == mv.row && c.this_mv.col == mv.col {
-                  mode_luma = *m;
+            let is_compound = ref_frames[1] != NONE_FRAME;
+            let mode_context = cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, false, fi, is_compound);
+
+            // TODO proper remap when is_compound is true
+            if !mode_luma.is_intra() {
+                if is_compound && mode_luma != PredictionMode::GLOBAL_GLOBALMV {
+                    let match0 = mv_stack[0].this_mv.row == mvs[0].row && mv_stack[0].this_mv.col == mvs[0].col;
+                    let match1 = mv_stack[0].comp_mv.row == mvs[1].row && mv_stack[0].comp_mv.col == mvs[1].col;
+
+                    mode_luma = if match0 && match1 {
+                        PredictionMode::NEAREST_NEARESTMV
+                    } else if match0 {
+                        PredictionMode::NEAREST_NEWMV
+                    } else if match1 {
+                        PredictionMode::NEW_NEARESTMV
+                    } else {
+                        PredictionMode::NEW_NEWMV
+                    };
+                    if mode_luma != PredictionMode::NEAREST_NEARESTMV && mvs[0].row == 0 && mvs[0].col == 0 &&
+                        mvs[1].row == 0 && mvs[1].col == 0 {
+                        mode_luma = PredictionMode::GLOBAL_GLOBALMV;
+                    }
+                    mode_chroma = mode_luma;
+                } else if !is_compound && mode_luma != PredictionMode::GLOBALMV {
+                    mode_luma = PredictionMode::NEWMV;
+                    for (c, m) in mv_stack.iter().take(4)
+                    .zip([PredictionMode::NEARESTMV, PredictionMode::NEAR0MV,
+                            PredictionMode::NEAR1MV, PredictionMode::NEAR2MV].iter()) {
+                        if c.this_mv.row == mvs[0].row && c.this_mv.col == mvs[0].col {
+                            mode_luma = *m;
+                        }
+                    }
+                    if mode_luma == PredictionMode::NEWMV && mvs[0].row == 0 && mvs[0].col == 0 {
+                        mode_luma =
+                            if mv_stack.len() == 0 { PredictionMode::NEARESTMV }
+                            else if mv_stack.len() == 1 { PredictionMode::NEAR0MV }
+                            else { PredictionMode::GLOBALMV };
+                    }
+                    mode_chroma = mode_luma;
                 }
-              }
-              if mode_luma == PredictionMode::NEWMV && mv.row == 0 && mv.col == 0 {
-                mode_luma =
-                  if mv_stack.len() == 0 { PredictionMode::NEARESTMV }
-                  else if mv_stack.len() == 1 { PredictionMode::NEAR0MV }
-                  else { PredictionMode::GLOBALMV };
-              }
-              mode_chroma = mode_luma;
             }
 
             // FIXME: every final block that has gone through the RDO decision process is encoded twice
             cdef_coded = encode_block_a(seq, cw, if cdef_coded  {w_post_cdef} else {w_pre_cdef},
                          bsize, bo, skip);
             encode_block_b(seq, fi, fs, cw, if cdef_coded  {w_post_cdef} else {w_pre_cdef},
-                          mode_luma, mode_chroma, ref_frame, mv, bsize, bo, skip, seq.bit_depth, cfl,
+                          mode_luma, mode_chroma, ref_frames, mvs, bsize, bo, skip, seq.bit_depth, cfl,
                           tx_size, tx_type, mode_context, &mv_stack);
         },
         PartitionType::PARTITION_SPLIT => {
diff --git a/src/entropymode.rs b/src/entropymode.rs
index 020c3feb..d4698e9d 100644
--- a/src/entropymode.rs
+++ b/src/entropymode.rs
@@ -899,7 +899,7 @@ pub static default_refmv_cdf: [[u16; cdf_size!(2)]; REFMV_MODE_CONTEXTS] = [
 pub static default_drl_cdf: [[u16; cdf_size!(2)]; DRL_MODE_CONTEXTS] =
   [cdf!(13104), cdf!(24560), cdf!(18945)];
 
-pub static default_inter_compound_mode_cdf: [[u16;
+pub static default_compound_mode_cdf: [[u16;
   cdf_size!(INTER_COMPOUND_MODES)];
   INTER_MODE_CONTEXTS] = [
   cdf!(7760, 13823, 15808, 17641, 19156, 20666, 26891),
@@ -1125,7 +1125,7 @@ pub static default_obmc_cdf: [[u16; cdf_size!(2)];
 pub static default_intra_inter_cdf: [[u16; cdf_size!(2)];
   INTRA_INTER_CONTEXTS] = [cdf!(806), cdf!(16662), cdf!(20186), cdf!(26538)];
 
-pub static default_comp_inter_cdf: [[u16; cdf_size!(2)]; COMP_INTER_CONTEXTS] =
+pub static default_comp_mode_cdf: [[u16; cdf_size!(2)]; COMP_INTER_CONTEXTS] =
   [cdf!(26828), cdf!(24035), cdf!(12031), cdf!(10640), cdf!(2901)];
 
 pub static default_comp_ref_type_cdf: [[u16; cdf_size!(2)];
diff --git a/src/me.rs b/src/me.rs
index c95b01e3..f04d1c5a 100644
--- a/src/me.rs
+++ b/src/me.rs
@@ -45,7 +45,7 @@ pub fn motion_estimation(
         x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT,
         y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT
       };
-      let range = 32 as isize;
+      let range = 32 * fi.me_range_scale as isize;
       let blk_w = bsize.width();
       let blk_h = bsize.height();
       let border_w = 128 + blk_w as isize * 8;
@@ -113,7 +113,8 @@ pub fn motion_estimation(
                 &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 });
 
               mode.predict_inter(
-                fi, 0, &po, tmp_slice, blk_w, blk_h, ref_frame, &cand_mv, 8,
+                fi, 0, &po, tmp_slice, blk_w, blk_h, &[ref_frame, NONE_FRAME],
+                &[cand_mv, MotionVector{ row: 0, col: 0 }], 8,
               );
             }
 
diff --git a/src/partition.rs b/src/partition.rs
index c4db29f4..3338c4d8 100644
--- a/src/partition.rs
+++ b/src/partition.rs
@@ -965,134 +965,175 @@ impl PredictionMode {
   pub fn predict_inter<'a>(
     self, fi: &FrameInvariants, p: usize, po: &PlaneOffset,
     dst: &'a mut PlaneMutSlice<'a>, width: usize, height: usize,
-    ref_frame: usize, mv: &MotionVector, bit_depth: usize
+    ref_frames: &[usize; 2], mvs: &[MotionVector; 2], bit_depth: usize
   ) {
     assert!(!self.is_intra());
 
-    match fi.rec_buffer.frames[fi.ref_frames[ref_frame - LAST_FRAME] as usize] {
-      Some(ref rec) => {
-        let rec_cfg = &rec.frame.planes[p].cfg;
-        let shift_row = 3 + rec_cfg.ydec;
-        let shift_col = 3 + rec_cfg.xdec;
-        let row_offset = mv.row as i32 >> shift_row;
-        let col_offset = mv.col as i32 >> shift_col;
-        let row_frac =
-          (mv.row as i32 - (row_offset << shift_row)) << (4 - shift_row);
-        let col_frac =
-          (mv.col as i32 - (col_offset << shift_col)) << (4 - shift_col);
-        let ref_stride = rec_cfg.stride;
-
-        let stride = dst.plane.cfg.stride;
-        let slice = dst.as_mut_slice();
-
-        let max_sample_val = ((1 << bit_depth) - 1) as i32;
-        let y_filter_idx = if height <= 4 { 4 } else { 0 };
-        let x_filter_idx = if width <= 4 { 4 } else { 0 };
-        let shifts = {
-          let shift_offset = if bit_depth == 12 { 2 } else { 0 };
-          (3 + shift_offset, 11 - shift_offset)
-        };
-        let round_shift =
-          |n, shift| -> i32 { (n + (1 << (shift - 1))) >> shift };
-
-        match (col_frac, row_frac) {
-          (0, 0) => {
-            let qo = PlaneOffset {
-              x: po.x + col_offset as isize,
-              y: po.y + row_offset as isize
-            };
-            let ps = rec.frame.planes[p].slice(&qo);
-            let s = ps.as_slice_clamped();
-            for r in 0..height {
-              for c in 0..width {
-                let output_index = r * stride + c;
-                slice[output_index] = s[r * ref_stride + c];
-              }
-            }
-          }
-          (0, _) => {
-            let qo = PlaneOffset {
-              x: po.x + col_offset as isize,
-              y: po.y + row_offset as isize - 3
-            };
-            let ps = rec.frame.planes[p].slice(&qo);
-            let s = ps.as_slice_clamped();
-            for r in 0..height {
-              for c in 0..width {
-                let mut sum: i32 = 0;
-                for k in 0..8 {
-                  sum += s[(r + k) * ref_stride + c] as i32
-                    * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k];
-                }
-                let output_index = r * stride + c;
-                let val = round_shift(sum, 7).max(0).min(max_sample_val);
-                slice[output_index] = val as u16;
-              }
-            }
-          }
-          (_, 0) => {
-            let qo = PlaneOffset {
-              x: po.x + col_offset as isize - 3,
-              y: po.y + row_offset as isize
-            };
-            let ps = rec.frame.planes[p].slice(&qo);
-            let s = ps.as_slice_clamped();
-            for r in 0..height {
-              for c in 0..width {
-                let mut sum: i32 = 0;
-                for k in 0..8 {
-                  sum += s[r * ref_stride + (c + k)] as i32
-                    * SUBPEL_FILTERS[x_filter_idx][col_frac as usize][k];
+    let is_compound = ref_frames[1] > INTRA_FRAME && ref_frames[1] != NONE_FRAME;
+
+    let stride = dst.plane.cfg.stride;
+    let slice = dst.as_mut_slice();
+
+    for i in 0..(1 + is_compound as usize) {
+      match fi.rec_buffer.frames[fi.ref_frames[ref_frames[i] - LAST_FRAME] as usize] {
+        Some(ref rec) => {
+          let rec_cfg = &rec.frame.planes[p].cfg;
+          let shift_row = 3 + rec_cfg.ydec;
+          let shift_col = 3 + rec_cfg.xdec;
+          let row_offset = mvs[i].row as i32 >> shift_row;
+          let col_offset = mvs[i].col as i32 >> shift_col;
+          let row_frac =
+            (mvs[i].row as i32 - (row_offset << shift_row)) << (4 - shift_row);
+          let col_frac =
+            (mvs[i].col as i32 - (col_offset << shift_col)) << (4 - shift_col);
+          let ref_stride = rec_cfg.stride;
+
+          let max_sample_val = ((1 << bit_depth) - 1) as i32;
+          let y_filter_idx = if height <= 4 { 4 } else { 0 };
+          let x_filter_idx = if width <= 4 { 4 } else { 0 };
+          let shifts = {
+            let shift_offset = if bit_depth == 12 { 2 } else { 0 };
+            let inter_round0 = 3 + shift_offset;
+            let inter_round1 = if is_compound { 7 } else { 11 } - shift_offset;
+            (inter_round0, inter_round1, 14 - inter_round0 - inter_round1)
+          };
+          let round_shift =
+            |n, shift| -> i32 { (n + (1 << (shift - 1))) >> shift };
+
+          match (col_frac, row_frac) {
+            (0, 0) => {
+              let qo = PlaneOffset {
+                x: po.x + col_offset as isize,
+                y: po.y + row_offset as isize
+              };
+              let ps = rec.frame.planes[p].slice(&qo);
+              let s = ps.as_slice_clamped();
+              for r in 0..height {
+                for c in 0..width {
+                  let output_index = r * stride + c;
+                  let mut val = s[r * ref_stride + c] as i32;
+                  if is_compound {
+                    val = val << shifts.2;
+                    if i == 1 {
+                      val = val + slice[output_index] as i32 - 32768;
+                      val = round_shift(val, shifts.2 + 1);
+                      val = val.max(0).min(max_sample_val);
+                    } else {
+                      val = val + 32768;
+                    }
+                  }
+                  slice[output_index] = val as u16;
                 }
-                let output_index = r * stride + c;
-                let val =
-                  round_shift(round_shift(sum, shifts.0), shifts.1 - 7)
-                    .max(0)
-                    .min(max_sample_val);
-                slice[output_index] = val as u16;
               }
             }
-          }
-          (_, _) => {
-            let mut intermediate = [0 as i16; 8 * (128 + 7)];
-
-            let qo = PlaneOffset {
-              x: po.x + col_offset as isize - 3,
-              y: po.y + row_offset as isize - 3
-            };
-            let ps = rec.frame.planes[p].slice(&qo);
-            let s = ps.as_slice_clamped();
-            for cg in (0..width).step_by(8) {
-              for r in 0..height + 7 {
-                for c in cg..(cg + 8).min(width) {
+            (0, _) => {
+              let qo = PlaneOffset {
+                x: po.x + col_offset as isize,
+                y: po.y + row_offset as isize - 3
+              };
+              let ps = rec.frame.planes[p].slice(&qo);
+              let s = ps.as_slice_clamped();
+              for r in 0..height {
+                for c in 0..width {
                   let mut sum: i32 = 0;
                   for k in 0..8 {
-                    sum += s[r * ref_stride + (c + k)] as i32 * SUBPEL_FILTERS
-                      [x_filter_idx][col_frac as usize][k];
+                    sum += s[(r + k) * ref_stride + c] as i32
+                      * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k];
+                  }
+                  let output_index = r * stride + c;
+                  let mut val = round_shift(sum, shifts.0 + shifts.1 - 7);
+                  if is_compound && i == 1 {
+                    val = val + slice[output_index] as i32 - 32768;
+                    val = round_shift(val, shifts.2 + 1);
+                    val = val.max(0).min(max_sample_val);
+                  } else if !is_compound {
+                    val = val.max(0).min(max_sample_val);
+                  } else {
+                    val = val + 32768;
                   }
-                  let val = round_shift(sum, shifts.0);
-                  intermediate[8 * r + (c - cg)] = val as i16;
+                  slice[output_index] = val as u16;
                 }
               }
-
+            }
+            (_, 0) => {
+              let qo = PlaneOffset {
+                x: po.x + col_offset as isize - 3,
+                y: po.y + row_offset as isize
+              };
+              let ps = rec.frame.planes[p].slice(&qo);
+              let s = ps.as_slice_clamped();
               for r in 0..height {
-                for c in cg..(cg + 8).min(width) {
+                for c in 0..width {
                   let mut sum: i32 = 0;
                   for k in 0..8 {
-                    sum += intermediate[8 * (r + k) + c - cg] as i32
-                      * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k];
+                    sum += s[r * ref_stride + (c + k)] as i32
+                      * SUBPEL_FILTERS[x_filter_idx][col_frac as usize][k];
                   }
                   let output_index = r * stride + c;
-                  let val =
-                    round_shift(sum, shifts.1).max(0).min(max_sample_val);
+                  let mut val = round_shift(round_shift(sum, shifts.0) << 7, shifts.1);
+                  if is_compound && i == 1 {
+                    val = val + slice[output_index] as i32 - 32768;
+                    val = round_shift(val, shifts.2 + 1);
+                    val = val.max(0).min(max_sample_val);
+                  } else if !is_compound {
+                    val = val.max(0).min(max_sample_val);
+                  } else {
+                    val = val + 32768;
+                  }
                   slice[output_index] = val as u16;
                 }
               }
             }
+            (_, _) => {
+              let mut intermediate = [0 as i16; 8 * (128 + 7)];
+
+              let qo = PlaneOffset {
+                x: po.x + col_offset as isize - 3,
+                y: po.y + row_offset as isize - 3
+              };
+              let ps = rec.frame.planes[p].slice(&qo);
+              let s = ps.as_slice_clamped();
+              for cg in (0..width).step_by(8) {
+                for r in 0..height + 7 {
+                  for c in cg..(cg + 8).min(width) {
+                    let mut sum: i32 = 0;
+                    for k in 0..8 {
+                      sum += s[r * ref_stride + (c + k)] as i32 * SUBPEL_FILTERS
+                        [x_filter_idx][col_frac as usize][k];
+                    }
+                    let val = round_shift(sum, shifts.0);
+                    intermediate[8 * r + (c - cg)] = val as i16;
+                  }
+                }
+
+                for r in 0..height {
+                  for c in cg..(cg + 8).min(width) {
+                    let mut sum: i32 = 0;
+                    for k in 0..8 {
+                      sum += intermediate[8 * (r + k) + c - cg] as i32
+                        * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k];
+                    }
+                    let output_index = r * stride + c;
+                    let mut val = round_shift(sum, shifts.1);
+                    if is_compound && i == 1 {
+                      val = val + slice[output_index] as i32 - 32768;
+                      val = round_shift(val, shifts.2 + 1);
+                      val = val.max(0).min(max_sample_val);
+                    } else if !is_compound {
+                      val = val.max(0).min(max_sample_val);
+                    } else {
+                      val = val + 32768;
+                    }
+
+                    slice[output_index] = val as u16;
+                  }
+                }
+              }
+            }
           }
         }
+        None => ()
       }
-      None => ()
     }
   }
 }
diff --git a/src/predict.rs b/src/predict.rs
index 5c7c9d08..f9971e49 100644
--- a/src/predict.rs
+++ b/src/predict.rs
@@ -50,6 +50,14 @@ pub static RAV1E_INTER_MODES_MINIMAL: &'static [PredictionMode] = &[
   PredictionMode::NEWMV
 ];
 
+pub static RAV1E_INTER_COMPOUND_MODES: &'static [PredictionMode] = &[
+  PredictionMode::GLOBAL_GLOBALMV,
+  PredictionMode::NEAREST_NEARESTMV,
+  PredictionMode::NEW_NEWMV,
+  PredictionMode::NEAREST_NEWMV,
+  PredictionMode::NEW_NEARESTMV
+];
+
 // Weights are quadratic from '1' to '1 / block_size', scaled by 2^sm_weight_log2_scale.
 const sm_weight_log2_scale: u8 = 8;
 
diff --git a/src/rdo.rs b/src/rdo.rs
index 46ea29d5..677f2d65 100644
--- a/src/rdo.rs
+++ b/src/rdo.rs
@@ -23,7 +23,7 @@ use motion_compensate;
 use partition::*;
 use plane::*;
 use cdef::*;
-use predict::{RAV1E_INTRA_MODES, RAV1E_INTRA_MODES_MINIMAL, RAV1E_INTER_MODES_MINIMAL};
+use predict::{RAV1E_INTRA_MODES, RAV1E_INTRA_MODES_MINIMAL, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTER_COMPOUND_MODES};
 use quantize::dc_q;
 use std;
 use std::f64;
@@ -37,6 +37,7 @@ use FrameState;
 use FrameType;
 use Tune;
 use Sequence;
+use encoder::ReferenceMode;
 
 #[derive(Clone)]
 pub struct RDOOutput {
@@ -52,8 +53,8 @@ pub struct RDOPartitionOutput {
   pub pred_mode_luma: PredictionMode,
   pub pred_mode_chroma: PredictionMode,
   pub pred_cfl_params: CFLParams,
-  pub ref_frame: usize,
-  pub mv: MotionVector,
+  pub ref_frames: [usize; 2],
+  pub mvs: [MotionVector; 2],
   pub skip: bool,
   pub tx_size: TxSize,
   pub tx_type: TxType,
@@ -212,7 +213,7 @@ fn compute_rd_cost(
 pub fn rdo_tx_size_type(
   seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
   cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset,
-  luma_mode: PredictionMode, ref_frame: usize, mv: MotionVector, skip: bool
+  luma_mode: PredictionMode, ref_frames: &[usize; 2], mvs: &[MotionVector; 2], skip: bool
 ) -> (TxSize, TxType) {
   // these rules follow TX_MODE_LARGEST
   let tx_size = match bsize {
@@ -235,8 +236,8 @@ pub fn rdo_tx_size_type(
         fs,
         cw,
         luma_mode,
-        ref_frame,
-        mv,
+        ref_frames,
+        mvs,
         bsize,
         bo,
         tx_size,
@@ -256,8 +257,8 @@ struct EncodingSettings {
   cfl_params: CFLParams,
   skip: bool,
   rd: f64,
-  ref_frame: usize,
-  mv: MotionVector,
+  ref_frames: [usize; 2],
+  mvs: [MotionVector; 2],
   tx_size: TxSize,
   tx_type: TxType
 }
@@ -270,8 +271,8 @@ impl Default for EncodingSettings {
       cfl_params: CFLParams::new(),
       skip: false,
       rd: std::f64::MAX,
-      ref_frame: INTRA_FRAME,
-      mv: MotionVector { row: 0, col: 0 },
+      ref_frames: [INTRA_FRAME, NONE_FRAME],
+      mvs: [MotionVector { row: 0, col: 0 }; 2],
       tx_size: TxSize::TX_4X4,
       tx_type: TxType::DCT_DCT
     }
@@ -304,50 +305,81 @@ pub fn rdo_mode_decision(
     RAV1E_INTRA_MODES_MINIMAL
   };
 
-  let mut ref_frame_set = Vec::new();
+  let mut ref_frames_set = Vec::new();
   let mut ref_slot_set = Vec::new();
+  let mut mvs_from_me = Vec::new();
+  let mut fwdref = None;
+  let mut bwdref = None;
 
   if fi.frame_type == FrameType::INTER {
     for i in LAST_FRAME..NONE_FRAME {
       // Don't search LAST3 since it's used only for probs
       if i == LAST3_FRAME { continue; }
       if !ref_slot_set.contains(&fi.ref_frames[i - LAST_FRAME]) {
-        ref_frame_set.push(i);
+        if fwdref == None && i < BWDREF_FRAME {
+          fwdref = Some(ref_frames_set.len());
+        }
+        if bwdref == None && i >= BWDREF_FRAME {
+          bwdref = Some(ref_frames_set.len());
+        }
+        ref_frames_set.push([i, NONE_FRAME]);
         ref_slot_set.push(fi.ref_frames[i - LAST_FRAME]);
+        mvs_from_me.push([motion_estimation(fi, fs, bsize, bo, i, pmv), MotionVector { row: 0, col: 0 }]);
       }
     }
-    assert!(ref_frame_set.len() != 0);
+    assert!(ref_frames_set.len() != 0);
   }
 
   let mut mode_set: Vec<(PredictionMode, usize)> = Vec::new();
   let mut mv_stacks = Vec::new();
   let mut mode_contexts = Vec::new();
 
-  for (i, &ref_frame) in ref_frame_set.iter().enumerate() {
-    let mut mvs: Vec<CandidateMV> = Vec::new();
-    mode_contexts.push(cw.find_mvrefs(bo, ref_frame, &mut mvs, bsize, false, fi));
+  for (i, &ref_frames) in ref_frames_set.iter().enumerate() {
+    let mut mv_stack: Vec<CandidateMV> = Vec::new();
+    mode_contexts.push(cw.find_mvrefs(bo, &ref_frames, &mut mv_stack, bsize, false, fi, false));
 
     if fi.frame_type == FrameType::INTER {
       for &x in RAV1E_INTER_MODES_MINIMAL {
         mode_set.push((x, i));
       }
       if fi.config.speed <= 2 {
-        if mvs.len() >= 3 {
+        if mv_stack.len() >= 3 {
           mode_set.push((PredictionMode::NEAR1MV, i));
         }
-        if mvs.len() >= 4 {
+        if mv_stack.len() >= 4 {
           mode_set.push((PredictionMode::NEAR2MV, i));
         }
       }
     }
-    mv_stacks.push(mvs);
+    mv_stacks.push(mv_stack);
+  }
+
+  let sz = bsize.width_mi().min(bsize.height_mi());
+
+  if fi.frame_type == FrameType::INTER && fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
+    // Adding compound candidate
+    if let Some(r0) = fwdref {
+      if let Some(r1) = bwdref {
+        let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]];
+        ref_frames_set.push(ref_frames);
+        let mv0 = mvs_from_me[r0][0];
+        let mv1 = mvs_from_me[r1][0];
+        mvs_from_me.push([mv0, mv1]);
+        let mut mv_stack: Vec<CandidateMV> = Vec::new();
+        mode_contexts.push(cw.find_mvrefs(bo, &ref_frames, &mut mv_stack, bsize, false, fi, true));
+        for &x in RAV1E_INTER_COMPOUND_MODES {
+          mode_set.push((x, ref_frames_set.len() - 1));
+        }
+        mv_stacks.push(mv_stack);
+      }
+    }
   }
 
   let luma_rdo = |luma_mode: PredictionMode, fs: &mut FrameState, cw: &mut ContextWriter, best: &mut EncodingSettings,
-    mv: MotionVector, ref_frame: usize, mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool,
+    mvs: &[MotionVector; 2], ref_frames: &[usize; 2], mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool,
     mode_context: usize, mv_stack: &Vec<CandidateMV>| {
     let (tx_size, mut tx_type) = rdo_tx_size_type(
-      seq, fi, fs, cw, bsize, bo, luma_mode, ref_frame, mv, false,
+      seq, fi, fs, cw, bsize, bo, luma_mode, ref_frames, mvs, false,
     );
 
     // Find the best chroma prediction mode for the current luma prediction mode
@@ -367,8 +399,8 @@ pub fn rdo_mode_decision(
           wr,
           luma_mode,
           chroma_mode,
-          ref_frame,
-          mv,
+          ref_frames,
+          mvs,
           bsize,
           bo,
           skip,
@@ -394,11 +426,12 @@ pub fn rdo_mode_decision(
         );
 
         if rd < best.rd {
+        //if rd < best.rd || luma_mode == PredictionMode::NEW_NEWMV {
           best.rd = rd;
           best.mode_luma = luma_mode;
           best.mode_chroma = chroma_mode;
-          best.ref_frame = ref_frame;
-          best.mv = mv;
+          best.ref_frames = *ref_frames;
+          best.mvs = *mvs;
           best.skip = skip;
           best.tx_size = tx_size;
           best.tx_type = tx_type;
@@ -420,36 +453,40 @@ pub fn rdo_mode_decision(
   }
 
   mode_set.iter().for_each(|&(luma_mode, i)| {
-    let mv = match luma_mode {
-      PredictionMode::NEWMV => motion_estimation(fi, fs, bsize, bo, ref_frame_set[i], pmv),
-      PredictionMode::NEARESTMV => if mv_stacks[i].len() > 0 {
-        mv_stacks[i][0].this_mv
+    let mvs = match luma_mode {
+      PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i],
+      PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => if mv_stacks[i].len() > 0 {
+        [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv]
       } else {
-        MotionVector { row: 0, col: 0 }
+        [MotionVector { row: 0, col: 0 }; 2]
       },
       PredictionMode::NEAR0MV => if mv_stacks[i].len() > 1 {
-        mv_stacks[i][1].this_mv
+        [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv]
       } else {
-        MotionVector { row: 0, col: 0 }
+        [MotionVector { row: 0, col: 0 }; 2]
       },
       PredictionMode::NEAR1MV | PredictionMode::NEAR2MV =>
-          mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].this_mv,
-      _ => MotionVector { row: 0, col: 0 }
+          [mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].this_mv,
+          mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].comp_mv],
+      PredictionMode::NEAREST_NEWMV => [mv_stacks[i][0].this_mv, mvs_from_me[i][1]],
+      PredictionMode::NEW_NEARESTMV => [mvs_from_me[i][0], mv_stacks[i][0].comp_mv],
+      _ => [MotionVector { row: 0, col: 0 }; 2]
     };
     let mode_set_chroma = vec![luma_mode];
 
-    luma_rdo(luma_mode, fs, cw, &mut best, mv, ref_frame_set[i], &mode_set_chroma, false,
+    luma_rdo(luma_mode, fs, cw, &mut best, &mvs, &ref_frames_set[i], &mode_set_chroma, false,
              mode_contexts[i], &mv_stacks[i]);
   });
 
   if !best.skip {
     intra_mode_set.iter().for_each(|&luma_mode| {
-      let mv = MotionVector { row: 0, col: 0 };
+      let mvs = &[MotionVector { row: 0, col: 0 }; 2];
+      let ref_frames = &[INTRA_FRAME, NONE_FRAME];
       let mut mode_set_chroma = vec![luma_mode];
       if is_chroma_block && luma_mode != PredictionMode::DC_PRED {
         mode_set_chroma.push(PredictionMode::DC_PRED);
       }
-      luma_rdo(luma_mode, fs, cw, &mut best, mv, INTRA_FRAME, &mode_set_chroma, true,
+      luma_rdo(luma_mode, fs, cw, &mut best, mvs, ref_frames, &mode_set_chroma, true,
                0, &Vec::new());
     });
   }
@@ -488,8 +525,8 @@ pub fn rdo_mode_decision(
         wr,
         best.mode_luma,
         chroma_mode,
-        best.ref_frame,
-        best.mv,
+        &best.ref_frames,
+        &best.mvs,
         bsize,
         bo,
         best.skip,
@@ -525,8 +562,8 @@ pub fn rdo_mode_decision(
   }
 
   cw.bc.set_mode(bo, bsize, best.mode_luma);
-  cw.bc.set_ref_frame(bo, bsize, best.ref_frame);
-  cw.bc.set_motion_vector(bo, bsize, best.mv);
+  cw.bc.set_ref_frames(bo, bsize, &best.ref_frames);
+  cw.bc.set_motion_vectors(bo, bsize, &best.mvs);
 
   assert!(best.rd >= 0_f64);
 
@@ -538,8 +575,8 @@ pub fn rdo_mode_decision(
       pred_mode_luma: best.mode_luma,
       pred_mode_chroma: best.mode_chroma,
       pred_cfl_params: best.cfl_params,
-      ref_frame: best.ref_frame,
-      mv: best.mv,
+      ref_frames: best.ref_frames,
+      mvs: best.mvs,
       rd_cost: best.rd,
       skip: best.skip,
       tx_size: best.tx_size,
@@ -594,7 +631,7 @@ pub fn rdo_cfl_alpha(
 // RDO-based transform type decision
 pub fn rdo_tx_type_decision(
   fi: &FrameInvariants, fs: &mut FrameState, cw: &mut ContextWriter,
-  mode: PredictionMode, ref_frame: usize, mv: MotionVector, bsize: BlockSize, bo: &BlockOffset, tx_size: TxSize,
+  mode: PredictionMode, ref_frames: &[usize; 2], mvs: &[MotionVector; 2], bsize: BlockSize, bo: &BlockOffset, tx_size: TxSize,
   tx_set: TxSet, bit_depth: usize
 ) -> TxType {
   let mut best_type = TxType::DCT_DCT;
@@ -617,7 +654,7 @@ pub fn rdo_tx_type_decision(
       continue;
     }
 
-    motion_compensate(fi, fs, cw, mode, ref_frame, mv, bsize, bo, bit_depth, true);
+    motion_compensate(fi, fs, cw, mode, ref_frames, mvs, bsize, bo, bit_depth, true);
 
     let mut wr: &mut dyn Writer = &mut WriterCounter::new();
     let tell = wr.tell_frac();
@@ -702,7 +739,7 @@ pub fn rdo_partition_decision(
         if subsize == BlockSize::BLOCK_INVALID {
           continue;
         }
-        pmv = best_pred_modes[0].mv;
+        pmv = best_pred_modes[0].mvs[0];
 
         assert!(best_pred_modes.len() <= 4);
         let bs = bsize.width_mi();
-- 
GitLab