From e6e2d886d30a9e9a173fb2d384c637855e8b22f9 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 10 Mar 2016 11:07:50 -0800
Subject: [PATCH] Add high-precision sub-pixel search as a speed feature

Using the up-sampled reference frames in sub-pixel motion search is
enabled as a speed feature for good-quality mode speed 0 and speed 1.

Change-Id: Ieb454bf8c646ddb99e87bd64c8e74dbd78d84a50
---
 configure                      |   1 -
 vp10/encoder/encoder.c         | 198 ++++++++++++------------
 vp10/encoder/encoder.h         |   6 -
 vp10/encoder/mbgraph.c         |   4 -
 vp10/encoder/mcomp.c           |  42 +-----
 vp10/encoder/mcomp.h           |   4 -
 vp10/encoder/rdopt.c           | 268 ++++++++++++++++-----------------
 vp10/encoder/rdopt.h           |   6 +-
 vp10/encoder/speed_features.c  |   7 +
 vp10/encoder/speed_features.h  |   3 +
 vp10/encoder/temporal_filter.c |   4 -
 vpx_dsp/variance.c             |   2 -
 vpx_dsp/vpx_dsp_rtcd_defs.pl   |  10 +-
 vpx_dsp/x86/variance_sse2.c    |   2 -
 14 files changed, 256 insertions(+), 301 deletions(-)

diff --git a/configure b/configure
index e7eb1521f1..97366e4787 100755
--- a/configure
+++ b/configure
@@ -284,7 +284,6 @@ EXPERIMENT_LIST="
     ext_partition
     ext_tile
     obmc
-    affine_motion
 "
 CONFIG_LIST="
     dependency_tracking
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 34dd8d54b1..fc65e723e9 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -358,9 +358,7 @@ void vp10_initialize_enc(void) {
 
 static void dealloc_compressor_data(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
-#if CONFIG_REF_MV
   int i;
-#endif
 
   vpx_free(cpi->mbmi_ext_base);
   cpi->mbmi_ext_base = NULL;
@@ -413,14 +411,9 @@ static void dealloc_compressor_data(VP10_COMP *cpi) {
   vpx_free(cpi->active_map.map);
   cpi->active_map.map = NULL;
 
-#if CONFIG_AFFINE_MOTION
-  {
-    // Free up-sampled reference buffers.
-    int i;
-    for (i = 0; i < MAX_REF_FRAMES; i++)
-      vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
-  }
-#endif
+  // Free up-sampled reference buffers.
+  for (i = 0; i < MAX_REF_FRAMES; i++)
+    vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
 
   vp10_free_ref_frame_buffers(cm->buffer_pool);
 #if CONFIG_VP9_POSTPROC
@@ -756,26 +749,6 @@ static void alloc_util_frame_buffers(VP10_COMP *cpi) {
                                NULL, NULL, NULL))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate scaled last source buffer");
-
-#if CONFIG_AFFINE_MOTION
-  {
-    // Allocate up-sampled reference buffers.
-    int i;
-
-    for (i = 0; i < MAX_REF_FRAMES; i++)
-      if (vpx_realloc_frame_buffer(&cpi->upsampled_ref_bufs[i].buf,
-                                   (cm->width << 3), (cm->height << 3),
-                                   cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
-                                   cm->use_highbitdepth,
-#endif
-                                   (VP9_ENC_BORDER_IN_PIXELS << 3),
-                                   cm->byte_alignment,
-                                   NULL, NULL, NULL))
-        vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
-            "Failed to allocate up-sampled reference frame buffer");
-  }
-#endif
 }
 
 
@@ -2069,6 +2042,14 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
   } while (++i <= MV_MAX);
 }
 
+static INLINE void init_upsampled_ref_frame_bufs(VP10_COMP *cpi) {
+  int i;
+
+  for (i = 0; i < MAX_REF_FRAMES; ++i) {
+    cpi->upsampled_ref_bufs[i].ref_count = 0;
+    cpi->upsampled_ref_idx[i] = INVALID_IDX;
+  }
+}
 
 VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
                                 BufferPool *const pool) {
@@ -2267,6 +2248,8 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
     vp10_init_second_pass(cpi);
   }
 
+  init_upsampled_ref_frame_bufs(cpi);
+
   vp10_set_speed_features_framesize_independent(cpi);
   vp10_set_speed_features_framesize_dependent(cpi);
 
@@ -2929,7 +2912,6 @@ static int recode_loop_test(VP10_COMP *cpi,
   return force_recode;
 }
 
-#if CONFIG_AFFINE_MOTION
 static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) {
   int i;
 
@@ -2941,50 +2923,59 @@ static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) {
   return INVALID_IDX;
 }
 
-// Up-sample reference frames.
-static INLINE int upsample_ref_frame(RefCntBuffer *bufs,
-#if CONFIG_VP9_HIGHBITDEPTH
-                                     EncRefCntBuffer *ubufs, int new_idx,
-                                     int bit_depth) {
-#else
-                                     EncRefCntBuffer *ubufs, int new_idx) {
-#endif
+// Up-sample 1 reference frame.
+static INLINE int upsample_ref_frame(VP10_COMP *cpi,
+                                     const YV12_BUFFER_CONFIG *const ref) {
+  VP10_COMMON * const cm = &cpi->common;
+  EncRefCntBuffer *ubufs = cpi->upsampled_ref_bufs;
   int new_uidx = get_free_upsampled_ref_buf(ubufs);
 
   if (new_uidx == INVALID_IDX) {
     return INVALID_IDX;
   } else {
-    const YV12_BUFFER_CONFIG *const ref = &bufs[new_idx].buf;
     YV12_BUFFER_CONFIG *upsampled_ref = &ubufs[new_uidx].buf;
 
+    // Can allocate buffer for Y plane only.
+    if (upsampled_ref->buffer_alloc_sz < (ref->buffer_alloc_sz << 6))
+      if (vpx_realloc_frame_buffer(upsampled_ref,
+                                   (cm->width << 3), (cm->height << 3),
+                                   cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                   cm->use_highbitdepth,
+#endif
+                                   (VP9_ENC_BORDER_IN_PIXELS << 3),
+                                   cm->byte_alignment,
+                                   NULL, NULL, NULL))
+        vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate up-sampled frame buffer");
+
     // Currently, only Y plane is up-sampled, U, V are not used.
 #if CONFIG_VP9_HIGHBITDEPTH
-    scale_and_extend_frame(ref, upsampled_ref, 1, bit_depth);
+    scale_and_extend_frame(ref, upsampled_ref, 1, (int)cm->bit_depth);
 #else
     scale_and_extend_frame(ref, upsampled_ref, 1);
 #endif
     return new_uidx;
   }
 }
-#endif
 
 void vp10_update_reference_frames(VP10_COMP *cpi) {
   VP10_COMMON * const cm = &cpi->common;
   BufferPool *const pool = cm->buffer_pool;
+  const int use_upsampled_ref = cpi->sf.use_upsampled_references;
+  int new_uidx = 0;
+
 #if CONFIG_EXT_REFS
   int ref_frame;
 #endif  // CONFIG_EXT_REFS
 
-#if CONFIG_AFFINE_MOTION
-  // Always up-sample the current encoded frame.
-#if CONFIG_VP9_HIGHBITDEPTH
-  int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs,
-                                    cm->new_fb_idx, (int)cm->bit_depth);
-#else
-  int new_uidx = upsample_ref_frame(pool->frame_bufs, cpi->upsampled_ref_bufs,
-                                    cm->new_fb_idx);
-#endif
-#endif
+  if (use_upsampled_ref) {
+    // Up-sample the current encoded frame.
+    RefCntBuffer *bufs = pool->frame_bufs;
+    const YV12_BUFFER_CONFIG *const ref = &bufs[cm->new_fb_idx].buf;
+
+    new_uidx = upsample_ref_frame(cpi, ref);
+  }
 
   // At this point the new frame has been encoded.
   // If any buffer copy / swapping is signaled it should be done here.
@@ -2994,12 +2985,12 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
     ref_cnt_fb(pool->frame_bufs,
                &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
 
-#if CONFIG_AFFINE_MOTION
-    uref_cnt_fb(cpi->upsampled_ref_bufs,
-                &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
-    uref_cnt_fb(cpi->upsampled_ref_bufs,
-                &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
-#endif
+    if (use_upsampled_ref) {
+      uref_cnt_fb(cpi->upsampled_ref_bufs,
+                  &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
+      uref_cnt_fb(cpi->upsampled_ref_bufs,
+                  &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+    }
   } else if (vp10_preserve_existing_gf(cpi)) {
     // We have decided to preserve the previously existing golden frame as our
     // new ARF frame. However, in the short term in function
@@ -3013,10 +3004,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
 
     ref_cnt_fb(pool->frame_bufs,
                &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
-#if CONFIG_AFFINE_MOTION
-    uref_cnt_fb(cpi->upsampled_ref_bufs,
-                &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
-#endif
+    if (use_upsampled_ref)
+      uref_cnt_fb(cpi->upsampled_ref_bufs,
+                  &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+
     tmp = cpi->alt_fb_idx;
     cpi->alt_fb_idx = cpi->gld_fb_idx;
     cpi->gld_fb_idx = tmp;
@@ -3030,10 +3021,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
 
       ref_cnt_fb(pool->frame_bufs,
                  &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
-#if CONFIG_AFFINE_MOTION
-      uref_cnt_fb(cpi->upsampled_ref_bufs,
-                  &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
-#endif
+      if (use_upsampled_ref)
+        uref_cnt_fb(cpi->upsampled_ref_bufs,
+                    &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+
       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
              cpi->interp_filter_selected[0],
              sizeof(cpi->interp_filter_selected[0]));
@@ -3042,10 +3033,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
     if (cpi->refresh_golden_frame) {
       ref_cnt_fb(pool->frame_bufs,
                  &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
-#if CONFIG_AFFINE_MOTION
-      uref_cnt_fb(cpi->upsampled_ref_bufs,
-                  &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
-#endif
+      if (use_upsampled_ref)
+        uref_cnt_fb(cpi->upsampled_ref_bufs,
+                    &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
+
       if (!cpi->rc.is_src_frame_alt_ref)
         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
                cpi->interp_filter_selected[0],
@@ -3080,10 +3071,10 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
   if (cpi->refresh_last_frame) {
     ref_cnt_fb(pool->frame_bufs,
                &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
-#if CONFIG_AFFINE_MOTION
-    uref_cnt_fb(cpi->upsampled_ref_bufs,
-                &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
-#endif
+    if (use_upsampled_ref)
+      uref_cnt_fb(cpi->upsampled_ref_bufs,
+                  &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
+
     if (!cpi->rc.is_src_frame_alt_ref) {
       memcpy(cpi->interp_filter_selected[LAST_FRAME],
              cpi->interp_filter_selected[0],
@@ -3249,8 +3240,9 @@ void vp10_scale_references(VP10_COMP *cpi) {
         }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-#if CONFIG_AFFINE_MOTION
-        {
+        if (cpi->sf.use_upsampled_references && (force_scaling ||
+            new_fb_ptr->buf.y_crop_width != cm->width ||
+            new_fb_ptr->buf.y_crop_height != cm->height)) {
           const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
           EncRefCntBuffer *ubuf =
               &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[map_idx]];
@@ -3267,15 +3259,12 @@ void vp10_scale_references(VP10_COMP *cpi) {
             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                                "Failed to allocate up-sampled frame buffer");
 #if CONFIG_VP9_HIGHBITDEPTH
-          scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE,
+          scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1,
                                  (int)cm->bit_depth);
 #else
-          scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, MAX_MB_PLANE);
+          scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1);
 #endif
-          cpi->scaled_ref_idx[ref_frame - LAST_FRAME] = new_fb;
-          alloc_frame_mvs(cm, new_fb);
         }
-#endif
       } else {
         const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
         RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
@@ -3610,9 +3599,28 @@ static void set_frame_size(VP10_COMP *cpi) {
   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
 }
 
+static void reset_use_upsampled_references(VP10_COMP *cpi) {
+  MV_REFERENCE_FRAME ref_frame;
+
+  // reset up-sampled reference buffer structure.
+  init_upsampled_ref_frame_bufs(cpi);
+
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+    const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi,
+                                                               ref_frame);
+    int new_uidx = upsample_ref_frame(cpi, ref);
+
+    // Update the up-sampled reference index.
+    cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)] =
+        new_uidx;
+    cpi->upsampled_ref_bufs[new_uidx].ref_count++;
+  }
+}
+
 static void encode_without_recode_loop(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
   int q = 0, bottom_index = 0, top_index = 0;  // Dummy variables.
+  const int use_upsampled_ref = cpi->sf.use_upsampled_references;
 
   vpx_clear_system_state();
 
@@ -3647,6 +3655,12 @@ static void encode_without_recode_loop(VP10_COMP *cpi) {
   set_size_independent_vars(cpi);
   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
 
+  // cpi->sf.use_upsampled_references can be different from frame to frame.
+  // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1.
+  // The reference frames for this frame have to be up-sampled before encoding.
+  if (!use_upsampled_ref && cpi->sf.use_upsampled_references)
+    reset_use_upsampled_references(cpi);
+
   vp10_set_quantizer(cm, q);
   vp10_set_variance_partition_thresholds(cpi, q);
 
@@ -3694,9 +3708,16 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
   int frame_over_shoot_limit;
   int frame_under_shoot_limit;
   int q = 0, q_low = 0, q_high = 0;
+  const int use_upsampled_ref = cpi->sf.use_upsampled_references;
 
   set_size_independent_vars(cpi);
 
+  // cpi->sf.use_upsampled_references can be different from frame to frame.
+  // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1.
+  // The reference frames for this frame have to be up-sampled before encoding.
+  if (!use_upsampled_ref && cpi->sf.use_upsampled_references)
+    reset_use_upsampled_references(cpi);
+
   do {
     vpx_clear_system_state();
 
@@ -4355,17 +4376,6 @@ static void init_ref_frame_bufs(VP10_COMMON *cm) {
   }
 }
 
-#if CONFIG_AFFINE_MOTION
-static INLINE void init_upsampled_ref_frame_bufs(VP10_COMP *cpi) {
-  int i;
-
-  for (i = 0; i < MAX_REF_FRAMES; ++i) {
-    cpi->upsampled_ref_bufs[i].ref_count = 0;
-    cpi->upsampled_ref_idx[i] = INVALID_IDX;
-  }
-}
-#endif
-
 static void check_initial_width(VP10_COMP *cpi,
 #if CONFIG_VP9_HIGHBITDEPTH
                                 int use_highbitdepth,
@@ -4388,9 +4398,7 @@ static void check_initial_width(VP10_COMP *cpi,
     alloc_raw_frame_buffers(cpi);
     init_ref_frame_bufs(cm);
     alloc_util_frame_buffers(cpi);
-#if CONFIG_AFFINE_MOTION
-    init_upsampled_ref_frame_bufs(cpi);
-#endif
+
     init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
 
     cpi->initial_width = cm->width;
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index b2c242c8c1..afe3292d37 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -286,12 +286,10 @@ typedef struct IMAGE_STAT {
   double worst;
 } ImageStat;
 
-#if CONFIG_AFFINE_MOTION
 typedef struct {
   int ref_count;
   YV12_BUFFER_CONFIG buf;
 } EncRefCntBuffer;
-#endif
 
 typedef struct VP10_COMP {
   QUANTS quants;
@@ -311,11 +309,9 @@ typedef struct VP10_COMP {
   YV12_BUFFER_CONFIG *unscaled_last_source;
   YV12_BUFFER_CONFIG scaled_last_source;
 
-#if CONFIG_AFFINE_MOTION
   // Up-sampled reference buffers
   EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES];
   int upsampled_ref_idx[MAX_REF_FRAMES];
-#endif
 
   TileDataEnc *tile_data;
   int allocated_tiles;  // Keep track of memory allocated for tiles.
@@ -702,7 +698,6 @@ void vp10_new_framerate(VP10_COMP *cpi, double framerate);
 
 #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
 
-#if CONFIG_AFFINE_MOTION
 // Update up-sampled reference frame index.
 static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
                                int new_uidx) {
@@ -714,7 +709,6 @@ static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
   *uidx = new_uidx;
   ubufs[new_uidx].ref_count++;
 }
-#endif
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp10/encoder/mbgraph.c b/vp10/encoder/mbgraph.c
index 32ff0faf67..5e66ce59a8 100644
--- a/vp10/encoder/mbgraph.c
+++ b/vp10/encoder/mbgraph.c
@@ -64,11 +64,7 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
         &v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
         cond_cost_list(cpi, cost_list),
         NULL, NULL,
-#if CONFIG_AFFINE_MOTION
         &distortion, &sse, NULL, 0, 0, 0);
-#else
-        &distortion, &sse, NULL, 0, 0);
-#endif
   }
 
 #if CONFIG_EXT_INTER
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 2c9397640d..1f147d7edd 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -210,7 +210,6 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
 
 #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
 
-#if CONFIG_AFFINE_MOTION
 static INLINE const uint8_t *upre(const uint8_t *buf, int stride,
                                   int r, int c) {
   return &buf[(r) * stride + (c)];
@@ -232,7 +231,6 @@ static INLINE const uint8_t *upre(const uint8_t *buf, int stride,
   } else {                                                             \
     v = INT_MAX;                                                       \
   }
-#endif
 
 #define FIRST_LEVEL_CHECKS                              \
   {                                                     \
@@ -438,11 +436,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore(
     int *distortion,
     unsigned int *sse1,
     const uint8_t *second_pred,
-#if CONFIG_AFFINE_MOTION
     int w, int h, int use_upsampled_ref) {
-#else
-    int w, int h) {
-#endif
   SETUP_SUBPEL_SEARCH;
   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                z, src_stride, y, y_stride, second_pred,
@@ -455,9 +449,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore(
   (void) allow_hp;
   (void) forced_stop;
   (void) hstep;
-#if CONFIG_AFFINE_MOTION
   (void) use_upsampled_ref;
-#endif
 
   if (cost_list &&
       cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
@@ -524,16 +516,10 @@ int vp10_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
                                              int *distortion,
                                              unsigned int *sse1,
                                              const uint8_t *second_pred,
-#if CONFIG_AFFINE_MOTION
                                              int w, int h,
                                              int use_upsampled_ref) {
-#else
-                                             int w, int h) {
-#endif
   SETUP_SUBPEL_SEARCH;
-#if CONFIG_AFFINE_MOTION
   (void) use_upsampled_ref;
-#endif
 
   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                z, src_stride, y, y_stride, second_pred,
@@ -607,15 +593,9 @@ int vp10_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
                                         int *distortion,
                                         unsigned int *sse1,
                                         const uint8_t *second_pred,
-#if CONFIG_AFFINE_MOTION
                                         int w, int h, int use_upsampled_ref) {
-#else
-                                        int w, int h) {
-#endif
   SETUP_SUBPEL_SEARCH;
-#if CONFIG_AFFINE_MOTION
   (void) use_upsampled_ref;
-#endif
 
   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                z, src_stride, y, y_stride, second_pred,
@@ -705,9 +685,8 @@ static const MV search_step_table[12] = {
     {0, -1}, {0, 1}, {-1, 0}, {1, 0}
 };
 
-
-#if CONFIG_AFFINE_MOTION
 #if CONFIG_VP9_HIGHBITDEPTH
+// TODO(yunqing): Optimize the following 2 functions.
 static void highbd_comp_avg_upsampled_pred(uint16_t *comp_pred,
                                            const uint8_t *pred8,
                                            int width, int height,
@@ -798,7 +777,6 @@ static unsigned int upsampled_setup_center_error(
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   return besterr;
 }
-#endif
 
 int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                  MV *bestmv, const MV *ref_mv,
@@ -812,11 +790,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                  int *distortion,
                                  unsigned int *sse1,
                                  const uint8_t *second_pred,
-#if CONFIG_AFFINE_MOTION
                                  int w, int h, int use_upsampled_ref) {
-#else
-                                 int w, int h) {
-#endif
   const uint8_t *const z = x->plane[0].src.buf;
   const uint8_t *const src_address = z;
   const int src_stride = x->plane[0].src.stride;
@@ -852,7 +826,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
   bestmv->row *= 8;
   bestmv->col *= 8;
 
-#if CONFIG_AFFINE_MOTION
   // use_upsampled_ref can be 0 or 1
   if (use_upsampled_ref)
     besterr = upsampled_setup_center_error(xd, bestmv, ref_mv, error_per_bit,
@@ -860,7 +833,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                            second_pred, w, h, (offset << 3),
                                            mvjcost, mvcost, sse1, distortion);
   else
-#endif
     besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                  z, src_stride, y, y_stride, second_pred,
                                  w, h, offset, mvjcost, mvcost,
@@ -876,7 +848,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
         MV this_mv = {tr, tc};
 
-#if CONFIG_AFFINE_MOTION
         if (use_upsampled_ref) {
           const uint8_t *const pre_address = y + tr * y_stride + tc;
 
@@ -884,7 +855,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                          pre_address, y_stride, second_pred,
                                          w, h, &sse);
         } else {
-#endif
           const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
               (tc >> 3);
           if (second_pred == NULL)
@@ -893,9 +863,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
           else
             thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                                 src_address, src_stride, &sse, second_pred);
-#if CONFIG_AFFINE_MOTION
         }
-#endif
 
         cost_array[idx] = thismse +
             mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -920,7 +888,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
       MV this_mv = {tr, tc};
 
-#if CONFIG_AFFINE_MOTION
       if (use_upsampled_ref) {
         const uint8_t *const pre_address = y + tr * y_stride + tc;
 
@@ -928,7 +895,6 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                        pre_address, y_stride, second_pred,
                                        w, h, &sse);
       } else {
-#endif
         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
 
         if (second_pred == NULL)
@@ -937,9 +903,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
         else
           thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, &sse, second_pred);
-#if CONFIG_AFFINE_MOTION
       }
-#endif
 
       cost_array[4] = thismse +
           mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -963,15 +927,11 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
     }
 
     if (iters_per_step > 1 && best_idx != -1) {
-#if CONFIG_AFFINE_MOTION
       if (use_upsampled_ref) {
         SECOND_LEVEL_CHECKS_BEST(1);
       } else {
-#endif
         SECOND_LEVEL_CHECKS_BEST(0);
-#if CONFIG_AFFINE_MOTION
       }
-#endif
     }
 
     tr = br;
diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h
index a430c76c2d..f99cd8b17e 100644
--- a/vp10/encoder/mcomp.h
+++ b/vp10/encoder/mcomp.h
@@ -116,11 +116,7 @@ typedef int (fractional_mv_step_fp) (
     int *mvjcost, int *mvcost[2],
     int *distortion, unsigned int *sse1,
     const uint8_t *second_pred,
-#if CONFIG_AFFINE_MOTION
     int w, int h, int use_upsampled_ref);
-#else
-    int w, int h);
-#endif
 
 extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree;
 extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index bb62e32edf..665ba7dc48 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -4664,52 +4664,52 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
     if (bestsme < INT_MAX) {
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
-#if CONFIG_AFFINE_MOTION
-      // Use up-sampled reference frames.
-      struct macroblockd_plane *const pd = &xd->plane[0];
-      struct buf_2d backup_pred = pd->pre[0];
-      const YV12_BUFFER_CONFIG *upsampled_ref =
-          get_upsampled_ref(cpi, refs[id]);
-
-      // Set pred for Y plane
-      setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
-                       upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
-                       NULL, pd->subsampling_x, pd->subsampling_y);
-
-      // If bsize < BLOCK_8X8, adjust pred pointer for this block
-      if (bsize < BLOCK_8X8)
-        pd->pre[0].buf =
-            &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, block,
-            pd->pre[0].stride)) << 3];
-
-      bestsme = cpi->find_fractional_mv_step(
-          x, &tmp_mv,
-          &ref_mv[id].as_mv,
-          cpi->common.allow_high_precision_mv,
-          x->errorperbit,
-          &cpi->fn_ptr[bsize],
-          0, cpi->sf.mv.subpel_iters_per_step,
-          NULL,
-          x->nmvjointcost, x->mvcost,
-          &dis, &sse, second_pred,
-          pw, ph, 1);
-
-      // Restore the reference frames.
-      pd->pre[0] = backup_pred;
-#else
-      (void) block;
-      bestsme = cpi->find_fractional_mv_step(
-          x, &tmp_mv,
-          &ref_mv[id].as_mv,
-          cpi->common.allow_high_precision_mv,
-          x->errorperbit,
-          &cpi->fn_ptr[bsize],
-          0, cpi->sf.mv.subpel_iters_per_step,
-          NULL,
-          x->nmvjointcost, x->mvcost,
-          &dis, &sse, second_pred,
-          pw, ph);
-#endif
+      if (cpi->sf.use_upsampled_references) {
+        // Use up-sampled reference frames.
+        struct macroblockd_plane *const pd = &xd->plane[0];
+        struct buf_2d backup_pred = pd->pre[0];
+        const YV12_BUFFER_CONFIG *upsampled_ref =
+            get_upsampled_ref(cpi, refs[id]);
+
+        // Set pred for Y plane
+        setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
+                         upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+                         NULL, pd->subsampling_x, pd->subsampling_y);
+
+        // If bsize < BLOCK_8X8, adjust pred pointer for this block
+        if (bsize < BLOCK_8X8)
+          pd->pre[0].buf =
+              &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, block,
+              pd->pre[0].stride)) << 3];
+
+        bestsme = cpi->find_fractional_mv_step(
+            x, &tmp_mv,
+            &ref_mv[id].as_mv,
+            cpi->common.allow_high_precision_mv,
+            x->errorperbit,
+            &cpi->fn_ptr[bsize],
+            0, cpi->sf.mv.subpel_iters_per_step,
+            NULL,
+            x->nmvjointcost, x->mvcost,
+            &dis, &sse, second_pred,
+            pw, ph, 1);
+
+        // Restore the reference frames.
+        pd->pre[0] = backup_pred;
+      } else {
+        (void) block;
+        bestsme = cpi->find_fractional_mv_step(
+            x, &tmp_mv,
+            &ref_mv[id].as_mv,
+            cpi->common.allow_high_precision_mv,
+            x->errorperbit,
+            &cpi->fn_ptr[bsize],
+            0, cpi->sf.mv.subpel_iters_per_step,
+            NULL,
+            x->nmvjointcost, x->mvcost,
+            &dis, &sse, second_pred,
+            pw, ph, 0);
+        }
     }
 
     // Restore the pointer to the first (possibly scaled) prediction buffer.
@@ -4990,57 +4990,57 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
           if (bestsme < INT_MAX) {
             int distortion;
-#if CONFIG_AFFINE_MOTION
-            const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
-            const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
-            // Use up-sampled reference frames.
-            struct macroblockd_plane *const pd = &xd->plane[0];
-            struct buf_2d backup_pred = pd->pre[0];
-            const YV12_BUFFER_CONFIG *upsampled_ref =
-                get_upsampled_ref(cpi, mbmi->ref_frame[0]);
-
-            // Set pred for Y plane
-            setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
-                             upsampled_ref->y_stride,
-                             (mi_row << 3), (mi_col << 3),
-                             NULL, pd->subsampling_x, pd->subsampling_y);
-
-            // adjust pred pointer for this block
-            pd->pre[0].buf =
-                &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i,
-                pd->pre[0].stride)) << 3];
-
-            cpi->find_fractional_mv_step(
-                x,
-                new_mv,
-                &bsi->ref_mv[0]->as_mv,
-                cm->allow_high_precision_mv,
-                x->errorperbit, &cpi->fn_ptr[bsize],
-                cpi->sf.mv.subpel_force_stop,
-                cpi->sf.mv.subpel_iters_per_step,
-                cond_cost_list(cpi, cost_list),
-                x->nmvjointcost, x->mvcost,
-                &distortion,
-                &x->pred_sse[mbmi->ref_frame[0]],
-                NULL, pw, ph, 1);
-
-            // Restore the reference frames.
-            pd->pre[0] = backup_pred;
-#else
-            cpi->find_fractional_mv_step(
-                x,
-                new_mv,
-                &bsi->ref_mv[0]->as_mv,
-                cm->allow_high_precision_mv,
-                x->errorperbit, &cpi->fn_ptr[bsize],
-                cpi->sf.mv.subpel_force_stop,
-                cpi->sf.mv.subpel_iters_per_step,
-                cond_cost_list(cpi, cost_list),
-                x->nmvjointcost, x->mvcost,
-                &distortion,
-                &x->pred_sse[mbmi->ref_frame[0]],
-                NULL, 0, 0);
-#endif
+            if (cpi->sf.use_upsampled_references) {
+              const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+              const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+              // Use up-sampled reference frames.
+              struct macroblockd_plane *const pd = &xd->plane[0];
+              struct buf_2d backup_pred = pd->pre[0];
+              const YV12_BUFFER_CONFIG *upsampled_ref =
+                  get_upsampled_ref(cpi, mbmi->ref_frame[0]);
+
+              // Set pred for Y plane
+              setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
+                               upsampled_ref->y_stride,
+                               (mi_row << 3), (mi_col << 3),
+                               NULL, pd->subsampling_x, pd->subsampling_y);
+
+              // adjust pred pointer for this block
+              pd->pre[0].buf =
+                  &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i,
+                  pd->pre[0].stride)) << 3];
+
+              cpi->find_fractional_mv_step(
+                  x,
+                  new_mv,
+                  &bsi->ref_mv[0]->as_mv,
+                  cm->allow_high_precision_mv,
+                  x->errorperbit, &cpi->fn_ptr[bsize],
+                  cpi->sf.mv.subpel_force_stop,
+                  cpi->sf.mv.subpel_iters_per_step,
+                  cond_cost_list(cpi, cost_list),
+                  x->nmvjointcost, x->mvcost,
+                  &distortion,
+                  &x->pred_sse[mbmi->ref_frame[0]],
+                  NULL, pw, ph, 1);
+
+              // Restore the reference frames.
+              pd->pre[0] = backup_pred;
+            } else {
+              cpi->find_fractional_mv_step(
+                  x,
+                  new_mv,
+                  &bsi->ref_mv[0]->as_mv,
+                  cm->allow_high_precision_mv,
+                  x->errorperbit, &cpi->fn_ptr[bsize],
+                  cpi->sf.mv.subpel_force_stop,
+                  cpi->sf.mv.subpel_iters_per_step,
+                  cond_cost_list(cpi, cost_list),
+                  x->nmvjointcost, x->mvcost,
+                  &distortion,
+                  &x->pred_sse[mbmi->ref_frame[0]],
+                  NULL, 0, 0, 0);
+            }
 
             // save motion search result for use in compound prediction
 #if CONFIG_EXT_INTER
@@ -5637,43 +5637,43 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
 
   if (bestsme < INT_MAX) {
     int dis;  /* TODO: use dis in distortion calculation later. */
-#if CONFIG_AFFINE_MOTION
-    const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
-    const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
-    // Use up-sampled reference frames.
-    struct macroblockd_plane *const pd = &xd->plane[0];
-    struct buf_2d backup_pred = pd->pre[ref_idx];
-    const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
-
-    // Set pred for Y plane
-    setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer,
-                     upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
-                     NULL, pd->subsampling_x, pd->subsampling_y);
-
-    bestsme = cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
-                                           cm->allow_high_precision_mv,
-                                           x->errorperbit,
-                                           &cpi->fn_ptr[bsize],
-                                           cpi->sf.mv.subpel_force_stop,
-                                           cpi->sf.mv.subpel_iters_per_step,
-                                           cond_cost_list(cpi, cost_list),
-                                           x->nmvjointcost, x->mvcost,
-                                           &dis, &x->pred_sse[ref], NULL,
-                                           pw, ph, 1);
-
-    // Restore the reference frames.
-    pd->pre[ref_idx] = backup_pred;
-#else
-    cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
-                                 cm->allow_high_precision_mv,
-                                 x->errorperbit,
-                                 &cpi->fn_ptr[bsize],
-                                 cpi->sf.mv.subpel_force_stop,
-                                 cpi->sf.mv.subpel_iters_per_step,
-                                 cond_cost_list(cpi, cost_list),
-                                 x->nmvjointcost, x->mvcost,
-                                 &dis, &x->pred_sse[ref], NULL, 0, 0);
-#endif
+    if (cpi->sf.use_upsampled_references) {
+      const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+      const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+      // Use up-sampled reference frames.
+      struct macroblockd_plane *const pd = &xd->plane[0];
+      struct buf_2d backup_pred = pd->pre[ref_idx];
+      const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+
+      // Set pred for Y plane
+      setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer,
+                       upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+                       NULL, pd->subsampling_x, pd->subsampling_y);
+
+      bestsme = cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
+                                             cm->allow_high_precision_mv,
+                                             x->errorperbit,
+                                             &cpi->fn_ptr[bsize],
+                                             cpi->sf.mv.subpel_force_stop,
+                                             cpi->sf.mv.subpel_iters_per_step,
+                                             cond_cost_list(cpi, cost_list),
+                                             x->nmvjointcost, x->mvcost,
+                                             &dis, &x->pred_sse[ref], NULL,
+                                             pw, ph, 1);
+
+      // Restore the reference frames.
+      pd->pre[ref_idx] = backup_pred;
+    } else {
+      cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
+                                   cm->allow_high_precision_mv,
+                                   x->errorperbit,
+                                   &cpi->fn_ptr[bsize],
+                                   cpi->sf.mv.subpel_force_stop,
+                                   cpi->sf.mv.subpel_iters_per_step,
+                                   cond_cost_list(cpi, cost_list),
+                                   x->nmvjointcost, x->mvcost,
+                                   &dis, &x->pred_sse[ref], NULL, 0, 0, 0);
+    }
   }
   *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h
index f4d9b95537..174ad4dadb 100644
--- a/vp10/encoder/rdopt.h
+++ b/vp10/encoder/rdopt.h
@@ -103,13 +103,16 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
                                          int tmp_stride[MAX_MB_PLANE]);
 #endif  // CONFIG_OBMC
 
-#if CONFIG_AFFINE_MOTION
 static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
                                                           const int ref) {
   // Use up-sampled reference frames.
   int ref_idx = 0;
   if (ref == LAST_FRAME)
+#if CONFIG_EXT_REFS
+    ref_idx = cpi->lst_fb_idxes[ref - LAST_FRAME];
+#else
     ref_idx = cpi->lst_fb_idx;
+#endif
   else if (ref == GOLDEN_FRAME)
     ref_idx = cpi->gld_fb_idx;
   else if (ref == ALTREF_FRAME)
@@ -117,7 +120,6 @@ static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
 
   return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf;
 }
-#endif
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index ec8acdae29..169ae2cfc4 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -177,6 +177,7 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
     sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
     sf->allow_partition_search_skip = 1;
+    sf->use_upsampled_references = 0;
 #if CONFIG_EXT_TX
     sf->tx_type_search = PRUNE_TWO;
 #endif
@@ -279,6 +280,7 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
   sf->use_fast_coef_costing = 1;
   sf->allow_exhaustive_searches = 0;
   sf->exhaustive_searches_thresh = INT_MAX;
+  sf->use_upsampled_references = 0;
 
   // Use transform domain distortion computation
   // Note var-tx expt always uses pixel domain distortion.
@@ -495,6 +497,11 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
   sf->disable_filter_search_var_thresh = 0;
   sf->adaptive_interp_filter_search = 0;
   sf->allow_partition_search_skip = 0;
+#if CONFIG_EXT_REFS
+  sf->use_upsampled_references = 0;
+#else
+  sf->use_upsampled_references = 1;
+#endif
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index fbb69882c4..02ee204a97 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -479,6 +479,9 @@ typedef struct SPEED_FEATURES {
   // Fast approximation of vp10_model_rd_from_var_lapndz
   int simple_model_rd_from_var;
 
+  // Do sub-pixel search in up-sampled reference frames
+  int use_upsampled_references;
+
   // Whether to compute distortion in the image domain (slower but
   // more accurate), or in the transform domain (faster but less acurate).
   int use_transform_domain_distortion;
diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c
index 3e1246a807..b3cf8999b4 100644
--- a/vp10/encoder/temporal_filter.c
+++ b/vp10/encoder/temporal_filter.c
@@ -320,11 +320,7 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
                                          0, mv_sf->subpel_iters_per_step,
                                          cond_cost_list(cpi, cost_list),
                                          NULL, NULL,
-#if CONFIG_AFFINE_MOTION
                                          &distortion, &sse, NULL, 0, 0, 0);
-#else
-                                         &distortion, &sse, NULL, 0, 0);
-#endif
 
   // Restore input state
   x->plane[0].src = src;
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index 169769a1ad..ee1e3054a3 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -267,7 +267,6 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred,
   }
 }
 
-#if CONFIG_AFFINE_MOTION
 // Get pred block from up-sampled reference.
 void vpx_upsampled_pred_c(uint8_t *comp_pred,
                           int width, int height,
@@ -300,7 +299,6 @@ void vpx_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
       ref += stride;
     }
 }
-#endif
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_variance64(const uint8_t *a8, int  a_stride,
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 583d9fa895..e5c002a70f 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1233,12 +1233,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 #
 # ...
 #
-if (vpx_config("CONFIG_AFFINE_MOTION") eq "yes") {
-  add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
-    specialize qw/vpx_upsampled_pred sse2/;
-  add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
-    specialize qw/vpx_comp_avg_upsampled_pred sse2/;
-}
+add_proto qw/void vpx_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
+specialize qw/vpx_upsampled_pred sse2/;
+add_proto qw/void vpx_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+specialize qw/vpx_comp_avg_upsampled_pred sse2/;
 
 #
 # ...
diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c
index 7943c843c4..63fc1e6741 100644
--- a/vpx_dsp/x86/variance_sse2.c
+++ b/vpx_dsp/x86/variance_sse2.c
@@ -476,7 +476,6 @@ FNS(ssse3, ssse3);
 #undef FN
 #endif  // CONFIG_USE_X86INC
 
-#if CONFIG_AFFINE_MOTION
 void vpx_upsampled_pred_sse2(uint8_t *comp_pred,
                              int width, int height,
                              const uint8_t *ref,  int ref_stride) {
@@ -703,4 +702,3 @@ void vpx_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred,
       }
     }
 }
-#endif
-- 
GitLab