vp9_pickmode.c 56.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>
15

16 17 18 19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

20
#include "vp9/common/vp9_blockd.h"
21 22
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h"
23
#include "vp9/common/vp9_pred_common.h"
24 25
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
26

27
#include "vp9/encoder/vp9_cost.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
28
#include "vp9/encoder/vp9_encoder.h"
29
#include "vp9/encoder/vp9_pickmode.h"
30
#include "vp9/encoder/vp9_ratectrl.h"
31
#include "vp9/encoder/vp9_rd.h"
32

33 34 35 36 37 38
typedef struct {
  uint8_t *data;
  int stride;
  int in_use;
} PRED_BUFFER;

39
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd,
40 41 42 43
                      const TileInfo *const tile,
                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                      int_mv *mv_ref_list,
                      int mi_row, int mi_col) {
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
  const int *ref_sign_bias = cm->ref_frame_sign_bias;
  int i, refmv_count = 0;

  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];

  int different_ref_found = 0;
  int context_counter = 0;
  int const_motion = 0;

  // Blank the reference vector list
  vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);

  // The nearest 2 blocks are treated differently
  // if the size < 8x8 we get the mv from the bmi substructure,
  // and we also need to keep a mode count.
  for (i = 0; i < 2; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
hkuang's avatar
hkuang committed
63
                                                   xd->mi_stride].src_mi;
64 65 66 67 68 69
      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
      // Keep counts for entropy encoding.
      context_counter += mode_2_counter[candidate->mode];
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
70 71
        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
                        refmv_count, mv_ref_list, Done);
72 73 74 75 76 77 78 79 80 81 82 83
    }
  }

  const_motion = 1;

  // Check the rest of the neighbors in much the same way
  // as before except we don't need to keep track of sub blocks or
  // mode counts.
  for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
hkuang's avatar
hkuang committed
84
                                                    xd->mi_stride].src_mi->mbmi;
85 86 87
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
88
        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
89 90 91 92 93 94 95 96 97 98 99
    }
  }

  // Since we couldn't find 2 mvs from the same reference frame
  // go back through the neighbors and find motion vectors from
  // different reference frames.
  if (different_ref_found && !refmv_count) {
    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
      const POSITION *mv_ref = &mv_ref_search[i];
      if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
        const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
hkuang's avatar
hkuang committed
100
                                              * xd->mi_stride].src_mi->mbmi;
101 102

        // If the candidate is INTRA we don't want to consider its mv.
103 104
        IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
                                 refmv_count, mv_ref_list, Done);
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
      }
    }
  }

 Done:

  mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];

  // Clamp vectors
  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);

  return const_motion;
}

120 121 122 123
static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv,
                                  int64_t best_rd_sofar) {
124
  MACROBLOCKD *xd = &x->e_mbd;
hkuang's avatar
hkuang committed
125
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
126
  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
127 128
  const int step_param = cpi->sf.mv.fullpel_search_step_param;
  const int sadpb = x->sadperbit16;
129
  MV mvp_full;
130
  const int ref = mbmi->ref_frame[0];
131
  const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
132 133 134 135 136 137 138
  int dis;
  int rate_mode;
  const int tmp_col_min = x->mv_col_min;
  const int tmp_col_max = x->mv_col_max;
  const int tmp_row_min = x->mv_row_min;
  const int tmp_row_max = x->mv_row_max;
  int rv = 0;
139
  int cost_list[5];
140 141
  const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
                                                                        ref);
142 143 144 145 146 147 148
  if (scaled_ref_frame) {
    int i;
    // Swap out the reference frame for a version that's been scaled to
    // match the resolution of the current frame, allowing the existing
    // motion search code to be used without additional modifications.
    for (i = 0; i < MAX_MB_PLANE; i++)
      backup_yv12[i] = xd->plane[i].pre[0];
149
    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
150
  }
151
  vp9_set_mv_search_range(x, &ref_mv);
152

Yaowu Xu's avatar
Yaowu Xu committed
153 154 155 156
  assert(x->mv_best_ref_index[ref] <= 2);
  if (x->mv_best_ref_index[ref] < 2)
    mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
  else
Dmitry Kovalev's avatar
Dmitry Kovalev committed
157
    mvp_full = x->pred_mv[ref];
158 159 160 161

  mvp_full.col >>= 3;
  mvp_full.row >>= 3;

162
  vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
163
                        cond_cost_list(cpi, cost_list),
164
                        &ref_mv, &tmp_mv->as_mv, INT_MAX, 0);
165

166 167 168 169 170
  x->mv_col_min = tmp_col_min;
  x->mv_col_max = tmp_col_max;
  x->mv_row_min = tmp_row_min;
  x->mv_row_max = tmp_row_max;

171 172 173
  // calculate the bit cost on motion vector
  mvp_full.row = tmp_mv->as_mv.row * 8;
  mvp_full.col = tmp_mv->as_mv.col * 8;
174

175 176
  *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
                             x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
177

178 179 180 181 182 183 184 185 186 187 188 189
  rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref]]
                                  [INTER_OFFSET(NEWMV)];
  rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) >
         best_rd_sofar);

  if (rv) {
    cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
                                 cpi->common.allow_high_precision_mv,
                                 x->errorperbit,
                                 &cpi->fn_ptr[bsize],
                                 cpi->sf.mv.subpel_force_stop,
                                 cpi->sf.mv.subpel_iters_per_step,
190
                                 cond_cost_list(cpi, cost_list),
191
                                 x->nmvjointcost, x->mvcost,
192
                                 &dis, &x->pred_sse[ref], NULL, 0, 0);
193 194
    *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                               x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
195 196 197 198 199 200 201
  }

  if (scaled_ref_frame) {
    int i;
    for (i = 0; i < MAX_MB_PLANE; i++)
      xd->plane[i].pre[0] = backup_yv12[i];
  }
202
  return rv;
203 204
}

205 206
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
207 208
                              int *out_rate_sum, int64_t *out_dist_sum,
                              unsigned int *var_y, unsigned int *sse_y) {
209 210 211 212
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
213 214
  int rate;
  int64_t dist;
215 216
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
217 218
  const int64_t dc_thr = p->quant_thred[0] >> 6;
  const int64_t ac_thr = p->quant_thred[1] >> 6;
219 220
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
221 222
  unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                           pd->dst.buf, pd->dst.stride, &sse);
223 224
  int skip_dc = 0;

225 226 227
  *var_y = var;
  *sse_y = sse;

228 229
  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
hkuang's avatar
hkuang committed
230
      xd->mi[0].src_mi->mbmi.tx_size =
231 232
          MIN(max_txsize_lookup[bsize],
              tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
233
    else
hkuang's avatar
hkuang committed
234
      xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
235

236 237
    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
      if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
238
          cyclic_refresh_segment_id_boosted(xd->mi[0].src_mi->mbmi.segment_id))
239 240 241 242
        xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
      else if (xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)
        xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;
    }
243
  } else {
hkuang's avatar
hkuang committed
244
    xd->mi[0].src_mi->mbmi.tx_size =
245 246
        MIN(max_txsize_lookup[bsize],
            tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
247 248
  }

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
  // Evaluate if the partition block is a skippable block in Y plane.
  {
    const BLOCK_SIZE unit_size =
        txsize_to_bsize[xd->mi[0].src_mi->mbmi.tx_size];
    const unsigned int num_blk_log2 =
        (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
        (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
    const unsigned int sse_tx = sse >> num_blk_log2;
    const unsigned int var_tx = var >> num_blk_log2;

    x->skip_txfm[0] = 0;
    // Check if all ac coefficients can be quantized to zero.
    if (var_tx < ac_thr || var == 0) {
      x->skip_txfm[0] = 2;
      // Check if dc coefficient can be quantized to zero.
      if (sse_tx - var_tx < dc_thr || sse == var)
        x->skip_txfm[0] = 1;
266 267 268
    } else {
      if (sse_tx - var_tx < dc_thr || sse == var)
        skip_dc = 1;
269 270 271 272 273 274 275 276 277
    }
  }

  if (x->skip_txfm[0] == 1) {
    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;
    return;
  }

278
  if (!skip_dc) {
279
#if CONFIG_VP9_HIGHBITDEPTH
280 281 282 283 284 285 286 287
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> 3, &rate, &dist);
    }
#else
288
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
289 290
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
291
  }
292

293 294 295 296 297 298 299
  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }
300

301 302
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
303
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
304
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
305
  } else {
306
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
307
                                 ac_quant >> 3, &rate, &dist);
308 309
  }
#else
310
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
311
                               ac_quant >> 3, &rate, &dist);
312 313
#endif  // CONFIG_VP9_HIGHBITDEPTH

314 315
  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
316 317
}

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
                      int *skippable, int64_t *sse, int plane,
                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *xd = &x->e_mbd;
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const struct macroblock_plane *const p = &x->plane[plane];
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  const int step = 1 << (tx_size << 1);
  const int block_step = (1 << tx_size);
  int block = 0, r, c;
  int shift = tx_size == TX_32X32 ? 0 : 2;
  const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
      xd->mb_to_right_edge >> (5 + pd->subsampling_x));
  const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
      xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));

#if CONFIG_VP9_HIGHBITDEPTH
  unsigned int var_y, sse_y;
  model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
  *sse = INT_MAX;
  *skippable = 0;
  return;
#else
  (void)cpi;
#endif

  vp9_subtract_plane(x, bsize, plane);

  *skippable = 1;
  *rate = 0;
  *dist = 0;
  *sse = 0;

  // Keep track of the row and column of the blocks we use so that we know
  // if we are in the unrestricted motion border.
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
        const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
        int i, j;
        const int16_t *src_diff;
        int64_t this_sse;
        txfrm_block_to_raster_xy(bsize, tx_size, block, &i, &j);
        src_diff = &p->src_diff[4 * (j * diff_stride + i)];

        switch (tx_size) {
          case TX_32X32:
            vp9_fdct32x32_rd(src_diff, coeff, diff_stride);
            vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
                                  p->round_fp, p->quant_fp, p->quant_shift,
                                  qcoeff, dqcoeff, pd->dequant, eob,
                                  scan_order->scan, scan_order->iscan);
            break;
          case TX_16X16:
            vp9_fdct16x16(src_diff, coeff, diff_stride);
            vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_8X8:
            vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
            vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_4X4:
            x->fwd_txm4x4(src_diff, coeff, diff_stride);
            vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          default:
            assert(0);
            break;
        }

        *dist += vp9_block_error(coeff, dqcoeff, step << 4, &this_sse) >> shift;
        *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);

        *sse += (this_sse >> shift);
        *skippable &= (*eob == 0);
      }
      block += step;
    }
  }

  *rate <<= 8;
  *rate *= 6;
}

417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize,
                               MACROBLOCK *x, MACROBLOCKD *xd,
                               int *out_rate_sum, int64_t *out_dist_sum,
                               unsigned int *var_y, unsigned int *sse_y) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  int i;

  *out_rate_sum = 0;
  *out_dist_sum = 0;

  for (i = 1; i <= 2; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
    const uint32_t dc_quant = pd->dequant[0];
    const uint32_t ac_quant = pd->dequant[1];
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
    unsigned int var;

    if (!x->color_sensitivity[i - 1])
      continue;

    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                             pd->dst.buf, pd->dst.stride, &sse);
    *var_y += var;
    *sse_y += sse;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                 dc_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate >> 1;
    *out_dist_sum += dist << 3;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                 ac_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate;
    *out_dist_sum += dist << 4;
  }
}

482 483 484 485 486 487 488 489 490 491 492 493 494
static int get_pred_buffer(PRED_BUFFER *p, int len) {
  int i;

  for (i = 0; i < len; i++) {
    if (!p[i].in_use) {
      p[i].in_use = 1;
      return i;
    }
  }
  return -1;
}

static void free_pred_buffer(PRED_BUFFER *p) {
495 496
  if (p != NULL)
    p->in_use = 0;
497 498
}

499 500 501 502 503 504 505 506
static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
                                 MV_REFERENCE_FRAME ref_frame,
                                 PREDICTION_MODE this_mode,
                                 unsigned int var_y, unsigned int sse_y,
                                 struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                 int *rate, int64_t *dist) {
  MACROBLOCKD *xd = &x->e_mbd;
hkuang's avatar
hkuang committed
507
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522

  const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  unsigned int var = var_y, sse = sse_y;
  // Skipping threshold for ac.
  unsigned int thresh_ac;
  // Skipping threshold for dc.
  unsigned int thresh_dc;
  if (x->encode_breakout > 0) {
    // Set a maximum for threshold to avoid big PSNR loss in low bit rate
    // case. Use extreme low threshold for static frames to limit
    // skipping.
    const unsigned int max_thresh = 36000;
    // The encode_breakout input
    const unsigned int min_thresh =
        MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
523
#if CONFIG_VP9_HIGHBITDEPTH
524
    const int shift = (xd->bd << 1) - 16;
525
#endif
526 527

    // Calculate threshold according to dequant value.
528
    thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
529 530 531 532 533
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
534 535 536 537
    thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);

    // Adjust ac threshold according to partition size.
    thresh_ac >>=
538
        8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
539 540

    thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
541 542 543 544 545
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
  } else {
    thresh_ac = 0;
    thresh_dc = 0;
  }

  // Y skipping condition checking for ac and dc.
  if (var <= thresh_ac && (sse - var) <= thresh_dc) {
    unsigned int sse_u, sse_v;
    unsigned int var_u, var_v;

    // Skip UV prediction unless breakout is zero (lossless) to save
    // computation with low impact on the result
    if (x->encode_breakout == 0) {
      xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
      xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
      vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
    }

    var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
                                    x->plane[1].src.stride,
                                    xd->plane[1].dst.buf,
                                    xd->plane[1].dst.stride, &sse_u);

    // U skipping condition checking
570
    if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
571 572 573 574 575 576
      var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
                                      x->plane[2].src.stride,
                                      xd->plane[2].dst.buf,
                                      xd->plane[2].dst.stride, &sse_v);

      // V skipping condition checking
577
      if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
578 579 580 581
        x->skip = 1;

        // The cost of skip bit needs to be added.
        *rate = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
582
                                    [INTER_OFFSET(this_mode)];
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598

        // More on this part of rate
        // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

        // Scaling factor for SSE from spatial domain to frequency
        // domain is 16. Adjust distortion accordingly.
        // TODO(yunqingwang): In this function, only y-plane dist is
        // calculated.
        *dist = (sse << 4);  // + ((sse_u + sse_v) << 4);

        // *disable_skip = 1;
      }
    }
  }
}

599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
struct estimate_block_intra_args {
  VP9_COMP *cpi;
  MACROBLOCK *x;
  PREDICTION_MODE mode;
  int rate;
  int64_t dist;
};

static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                 TX_SIZE tx_size, void *arg) {
  struct estimate_block_intra_args* const args = arg;
  VP9_COMP *const cpi = args->cpi;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
  uint8_t *const src_buf_base = p->src.buf;
  uint8_t *const dst_buf_base = pd->dst.buf;
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  int i, j;
  int rate;
  int64_t dist;
623 624 625
  int64_t this_sse;
  int is_skippable;

626 627 628 629 630 631 632 633
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  assert(plane == 0);
  (void) plane;

  p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
  pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
  // Use source buffer as an approximation for the fully reconstructed buffer.
  vp9_predict_intra_block(xd, block >> (2 * tx_size),
634
                          b_width_log2_lookup[plane_bsize],
635
                          tx_size, args->mode,
636 637
                          x->skip_encode ? p->src.buf : pd->dst.buf,
                          x->skip_encode ? src_stride : dst_stride,
638 639
                          pd->dst.buf, dst_stride,
                          i, j, 0);
640 641 642 643 644 645 646 647 648 649

  // TODO(jingning): This needs further refactoring.
  block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
            bsize_tx, tx_size);
  x->skip_txfm[0] = is_skippable;
  if (is_skippable)
    rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
  else
    rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);

650 651 652 653 654 655
  p->src.buf = src_buf_base;
  pd->dst.buf = dst_buf_base;
  args->rate += rate;
  args->dist += dist;
}

656
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
657
  {THR_DC, THR_H_PRED, THR_V_PRED, THR_TM},
658 659 660 661
  {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
  {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
};

662 663 664 665
static const PREDICTION_MODE intra_mode_list[] = {
  DC_PRED, V_PRED, H_PRED, TM_PRED
};

666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
  RD_COST this_rdc, best_rdc;
  PREDICTION_MODE this_mode;
  struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
  const TX_SIZE intra_tx_size =
      MIN(max_txsize_lookup[bsize],
          tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  MODE_INFO *const mic = xd->mi[0].src_mi;
  int *bmode_costs;
  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
  const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
  const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
  bmode_costs = cpi->y_mode_costs[A][L];

  (void) ctx;
  vp9_rd_cost_reset(&best_rdc);
  vp9_rd_cost_reset(&this_rdc);

  mbmi->ref_frame[0] = INTRA_FRAME;
  mbmi->mv[0].as_int = INVALID_MV;
  mbmi->uv_mode = DC_PRED;
  vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));

  // Change the limit of this loop to add other intra prediction
  // mode tests.
  for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
    args.mode = this_mode;
    args.rate = 0;
    args.dist = 0;
    mbmi->tx_size = intra_tx_size;
    vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
                                           estimate_block_intra, &args);
    this_rdc.rate = args.rate;
    this_rdc.dist = args.dist;
    this_rdc.rate += bmode_costs[this_mode];
    this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
                             this_rdc.rate, this_rdc.dist);

    if (this_rdc.rdcost < best_rdc.rdcost) {
      best_rdc = this_rdc;
      mbmi->mode = this_mode;
    }
  }

  *rd_cost = best_rdc;
}

717 718 719 720 721 722 723 724 725
typedef struct {
  MV_REFERENCE_FRAME ref_frame;
  PREDICTION_MODE pred_mode;
} REF_MODE;

#define RT_INTER_MODES 8
static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
    {LAST_FRAME, ZEROMV},
    {LAST_FRAME, NEARESTMV},
726
    {GOLDEN_FRAME, ZEROMV},
727 728 729 730 731 732 733
    {LAST_FRAME, NEARMV},
    {LAST_FRAME, NEWMV},
    {GOLDEN_FRAME, NEARESTMV},
    {GOLDEN_FRAME, NEARMV},
    {GOLDEN_FRAME, NEWMV}
};

734 735
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
736
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
737
                         TileDataEnc *tile_data,
738 739
                         int mi_row, int mi_col, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
740
  VP9_COMMON *const cm = &cpi->common;
741
  TileInfo *const tile_info = &tile_data->tile_info;
742
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
743
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
744
  struct macroblockd_plane *const pd = &xd->plane[0];
745
  PREDICTION_MODE best_mode = ZEROMV;
746
  MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
747
  MV_REFERENCE_FRAME usable_ref_frame;
748
  TX_SIZE best_tx_size = TX_SIZES;
749
  INTERP_FILTER best_pred_filter = EIGHTTAP;
750 751 752 753
  int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
                                    VP9_ALT_FLAG };
754
  RD_COST this_rdc, best_rdc;
755
  uint8_t skip_txfm = 0, best_mode_skip_txfm = 0;
756 757 758
  // var_y and sse_y are saved to be used in skipping checking
  unsigned int var_y = UINT_MAX;
  unsigned int sse_y = UINT_MAX;
759 760 761
  // Reduce the intra cost penalty for small blocks (<=16x16).
  const int reduction_fac =
      (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
762
       bsize <= BLOCK_16X16) ? 2 : 0;
763
  const int intra_cost_penalty = vp9_get_intra_cost_penalty(
764
      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
765 766
  const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
                                           intra_cost_penalty, 0);
767
  const int *const rd_threshes = cpi->rd.threshes[mbmi->segment_id][bsize];
768
  const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
Yaowu Xu's avatar
Yaowu Xu committed
769
  INTERP_FILTER filter_ref;
770
  const int bsl = mi_width_log2_lookup[bsize];
771
  const int pred_filter_search = cm->interp_filter == SWITCHABLE ?
772 773
      (((mi_row + mi_col) >> bsl) +
       get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
774
  int const_motion[MAX_REF_FRAMES] = { 0 };
775 776
  const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
  const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
777 778
  // For speed 6, the result of interp filter is reused later in actual encoding
  // process.
779 780 781
  // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
  PRED_BUFFER tmp[4];
  DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64);
782 783 784
#if CONFIG_VP9_HIGHBITDEPTH
  DECLARE_ALIGNED_ARRAY(16, uint16_t, pred_buf_16, 3 * 64 * 64);
#endif
785 786 787
  struct buf_2d orig_dst = pd->dst;
  PRED_BUFFER *best_pred = NULL;
  PRED_BUFFER *this_mode_pred = NULL;
788
  const int pixels_in_block = bh * bw;
789
  int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
790
  int ref_frame_skip_mask = 0;
791
  int idx;
792
  int best_pred_sad = INT_MAX;
793 794 795 796 797 798 799 800 801 802 803 804 805 806
  int ref_frame_cost[MAX_REF_FRAMES];
  vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
  vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);

  ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
      ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1);

  ref_frame_cost[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
807

808
  if (reuse_inter_pred) {
809
    int i;
810
    for (i = 0; i < 3; i++) {
811 812 813 814 815 816 817 818
#if CONFIG_VP9_HIGHBITDEPTH
      if (cm->use_highbitdepth)
        tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]);
      else
        tmp[i].data = &pred_buf[pixels_in_block * i];
#else
      tmp[i].data = &pred_buf[pixels_in_block * i];
#endif  // CONFIG_VP9_HIGHBITDEPTH
819 820 821 822 823 824 825 826
      tmp[i].stride = bw;
      tmp[i].in_use = 0;
    }
    tmp[3].data = pd->dst.buf;
    tmp[3].stride = pd->dst.stride;
    tmp[3].in_use = 0;
  }

827 828
  x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  x->skip = 0;
829

Yaowu Xu's avatar
Yaowu Xu committed
830 831 832 833 834 835 836
  if (xd->up_available)
    filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
  else if (xd->left_available)
    filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter;
  else
    filter_ref = cm->interp_filter;

837
  // initialize mode decisions
838 839
  vp9_rd_cost_reset(&best_rdc);
  vp9_rd_cost_reset(rd_cost);
840 841 842 843
  mbmi->sb_type = bsize;
  mbmi->ref_frame[0] = NONE;
  mbmi->ref_frame[1] = NONE;
  mbmi->tx_size = MIN(max_txsize_lookup[bsize],
844
                      tx_mode_to_biggest_tx_size[cm->tx_mode]);
845

846 847 848
#if CONFIG_VP9_TEMPORAL_DENOISING
  vp9_denoiser_reset_frame_stats(ctx);
#endif
849

850
  if (cpi->rc.frames_since_golden == 0) {
851
    cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
852 853 854 855
    usable_ref_frame = LAST_FRAME;
  } else {
    usable_ref_frame = GOLDEN_FRAME;
  }
856

857
  for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
858 859
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);

860
    x->pred_mv_sad[ref_frame] = INT_MAX;
861 862 863
    frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
    frame_mv[ZEROMV][ref_frame].as_int = 0;

864
    if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
865 866
      int_mv *const candidates = mbmi->ref_mvs[ref_frame];
      const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
867

868 869 870
      vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
                           sf, sf);

871
      if (cm->use_prev_frame_mvs)
872
        vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
873
                         candidates, mi_row, mi_col, NULL, NULL);
874
      else
875 876
        const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
                                             xd->mi[0].src_mi,
877 878 879 880 881 882 883 884 885 886
                                             ref_frame, candidates,
                                             mi_row, mi_col);

      vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
                            &frame_mv[NEARESTMV][ref_frame],
                            &frame_mv[NEARMV][ref_frame]);

      if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8)
        vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
                    ref_frame, bsize);
887
    } else {
888
      ref_frame_skip_mask |= (1 << ref_frame);
889
    }
890 891
  }

892 893 894 895 896 897
  for (idx = 0; idx < RT_INTER_MODES; ++idx) {
    int rate_mv = 0;
    int mode_rd_thresh;
    int mode_index;
    int i;
    PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
898 899 900
    int64_t this_sse;
    int is_skippable;

901 902
    if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
      continue;
903 904

    ref_frame = ref_mode_set[idx].ref_frame;
905 906
    if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
      continue;
907 908
    if (const_motion[ref_frame] && this_mode == NEARMV)
      continue;
909

910
    i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
911 912 913 914 915
    if (cpi->ref_frame_flags & flag_list[i])
      if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
        ref_frame_skip_mask |= (1 << ref_frame);
    if (ref_frame_skip_mask & (1 << ref_frame))
      continue;
916 917

    // Select prediction reference frames.
918 919
    for (i = 0; i < MAX_MB_PLANE; i++)
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
920

921
    mbmi->ref_frame[0] = ref_frame;
922
    set_ref_ptrs(cm, xd, ref_frame, NONE);
923

924
    mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
925 926 927 928 929
    mode_rd_thresh = best_mode_skip_txfm ?
            rd_threshes[mode_index] << 1 : rd_threshes[mode_index];
    if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
                            rd_thresh_freq_fact[mode_index]))
      continue;
Jim Bankoski's avatar
Jim Bankoski committed
930

931 932 933 934
    if (this_mode == NEWMV) {
      if (ref_frame > LAST_FRAME) {
        int tmp_sad;
        int dis, cost_list[5];
935

936
        if (bsize < BLOCK_16X16)
937
          continue;
938

939
        tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
940

941
        if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
Jim Bankoski's avatar
Jim Bankoski committed
942
          continue;
943 944
        if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad)
          continue;
Jim Bankoski's avatar
Jim Bankoski committed
945

946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
        frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
        rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
          &mbmi->ref_mvs[ref_frame][0].as_mv,
          x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
        frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
        frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;

        cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
          &mbmi->ref_mvs[ref_frame][0].as_mv,
          cpi->common.allow_high_precision_mv,
          x->errorperbit,
          &cpi->fn_ptr[bsize],
          cpi->sf.mv.subpel_force_stop,
          cpi->sf.mv.subpel_iters_per_step,
          cond_cost_list(cpi, cost_list),
          x->nmvjointcost, x->mvcost, &dis,
          &x->pred_sse[ref_frame], NULL, 0, 0);
      } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
        &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) {
965
        continue;
966 967
      }
    }
968

969 970 971 972 973 974 975 976 977
    if (this_mode == NEWMV && ref_frame == LAST_FRAME &&
        frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
      const int pre_stride = xd->plane[0].pre[0].stride;
      const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
          (frame_mv[NEWMV][LAST_FRAME].as_mv.row >> 3) * pre_stride +
          (frame_mv[NEWMV][LAST_FRAME].as_mv.col >> 3);
      best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,
                                   x->plane[0].src.stride,
                                   pre_buf, pre_stride);
978
      x->pred_mv_sad[LAST_FRAME] = best_pred_sad;
979 980
    }

981 982 983
    if (this_mode != NEARESTMV &&
        frame_mv[this_mode][ref_frame].as_int ==
            frame_mv[NEARESTMV][ref_frame].as_int)
984
      continue;
985

986 987 988 989 990 991 992 993 994 995 996 997 998
    mbmi->mode = this_mode;
    mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;

    // Search for the best prediction filter type, when the resulting
    // motion vector is at sub-pixel accuracy level for luma component, i.e.,
    // the last three bits are all zeros.
    if (reuse_inter_pred) {
      if (!this_mode_pred) {
        this_mode_pred = &tmp[3];
      } else {
        this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
        pd->dst.buf = this_mode_pred->data;
        pd->dst.stride = bw;
999
      }
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
    }

    if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search
        && (ref_frame == LAST_FRAME)
        && (((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) != 0)) {
      int pf_rate[3];
      int64_t pf_dist[3];
      unsigned int pf_var[3];
      unsigned int pf_sse[3];
      TX_SIZE pf_tx_size[3];
      int64_t best_cost = INT64_MAX;
      INTERP_FILTER best_filter = SWITCHABLE, filter;
      PRED_BUFFER *current_pred = this_mode_pred;

      for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
        int64_t cost;
        mbmi->interp_filter = filter;
        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
                          &pf_var[filter], &pf_sse[filter]);
        pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
        cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
        pf_tx_size[filter] = mbmi->tx_size;
        if (cost < best_cost) {
          best_filter = filter;
          best_cost = cost;
          skip_txfm = x->skip_txfm[0];

          if (reuse_inter_pred) {
            if (this_mode_pred != current_pred) {
              free_pred_buffer(this_mode_pred);
              this_mode_pred = current_pred;
            }
1033

1034 1035 1036 1037
            if (filter < EIGHTTAP_SHARP) {
              current_pred = &tmp[get_pred_buffer(tmp, 3)];
              pd->dst.buf = current_pred->data;
              pd->dst.stride = bw;
1038
            }
1039
          }
1040
        }
1041
      }
1042

1043 1044
      if (reuse_inter_pred && this_mode_pred != current_pred)
        free_pred_buffer(current_pred);
1045

1046
      mbmi->interp_filter = best_filter;
1047 1048 1049 1050 1051
      mbmi->tx_size = pf_tx_size[best_filter];
      this_rdc.rate = pf_rate[best_filter];
      this_rdc.dist = pf_dist[best_filter];
      var_y = pf_var[best_filter];
      sse_y = pf_sse[best_filter];
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061
      x->skip_txfm[0] = skip_txfm;
    } else {
      mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
      vp9_build_inter_predictors_sby(xd, mi_row,