vp9_pickmode.c 69.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>
15

16
#include "./vp9_rtcd.h"
Johann's avatar
Johann committed
17
#include "./vpx_dsp_rtcd.h"
18

19
#include "vpx_dsp/vpx_dsp_common.h"
20
#include "vpx_mem/vpx_mem.h"
21
#include "vpx_ports/mem.h"
22

23
#include "vp9/common/vp9_blockd.h"
24 25
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h"
26
#include "vp9/common/vp9_pred_common.h"
27 28
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
29
#include "vp9/common/vp9_scan.h"
30

31
#include "vp9/encoder/vp9_cost.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
32
#include "vp9/encoder/vp9_encoder.h"
33
#include "vp9/encoder/vp9_pickmode.h"
34
#include "vp9/encoder/vp9_ratectrl.h"
35
#include "vp9/encoder/vp9_rd.h"
36

37 38 39 40 41 42
typedef struct {
  uint8_t *data;
  int stride;
  int in_use;
} PRED_BUFFER;

43 44
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
                      const MACROBLOCKD *xd,
45 46 47 48
                      const TileInfo *const tile,
                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                      int_mv *mv_ref_list,
                      int mi_row, int mi_col) {
49 50 51 52 53 54 55 56 57 58
  const int *ref_sign_bias = cm->ref_frame_sign_bias;
  int i, refmv_count = 0;

  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];

  int different_ref_found = 0;
  int context_counter = 0;
  int const_motion = 0;

  // Blank the reference vector list
James Zern's avatar
James Zern committed
59
  memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
60 61 62 63 64 65 66 67

  // The nearest 2 blocks are treated differently
  // if the size < 8x8 we get the mv from the bmi substructure,
  // and we also need to keep a mode count.
  for (i = 0; i < 2; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
68
                                                   xd->mi_stride];
69 70 71 72 73 74
      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
      // Keep counts for entropy encoding.
      context_counter += mode_2_counter[candidate->mode];
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
75 76
        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
                        refmv_count, mv_ref_list, Done);
77 78 79 80 81 82 83 84 85 86 87 88
    }
  }

  const_motion = 1;

  // Check the rest of the neighbors in much the same way
  // as before except we don't need to keep track of sub blocks or
  // mode counts.
  for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
89
                                                    xd->mi_stride]->mbmi;
90 91 92
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
93
        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
94 95 96 97 98 99 100 101 102 103 104
    }
  }

  // Since we couldn't find 2 mvs from the same reference frame
  // go back through the neighbors and find motion vectors from
  // different reference frames.
  if (different_ref_found && !refmv_count) {
    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
      const POSITION *mv_ref = &mv_ref_search[i];
      if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
        const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
105
                                              * xd->mi_stride]->mbmi;
106 107

        // If the candidate is INTRA we don't want to consider its mv.
108 109
        IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
                                 refmv_count, mv_ref_list, Done);
110 111 112 113 114 115
      }
    }
  }

 Done:

116
  x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter];
117 118 119 120 121 122 123 124

  // Clamp vectors
  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);

  return const_motion;
}

125 126 127 128
static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv,
                                  int64_t best_rd_sofar) {
129
  MACROBLOCKD *xd = &x->e_mbd;
130
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
131
  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
132 133
  const int step_param = cpi->sf.mv.fullpel_search_step_param;
  const int sadpb = x->sadperbit16;
134
  MV mvp_full;
135
  const int ref = mbmi->ref_frame[0];
136
  const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
137 138 139 140 141 142 143
  int dis;
  int rate_mode;
  const int tmp_col_min = x->mv_col_min;
  const int tmp_col_max = x->mv_col_max;
  const int tmp_row_min = x->mv_row_min;
  const int tmp_row_max = x->mv_row_max;
  int rv = 0;
144
  int cost_list[5];
145 146
  const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
                                                                        ref);
147 148 149 150 151 152 153
  if (scaled_ref_frame) {
    int i;
    // Swap out the reference frame for a version that's been scaled to
    // match the resolution of the current frame, allowing the existing
    // motion search code to be used without additional modifications.
    for (i = 0; i < MAX_MB_PLANE; i++)
      backup_yv12[i] = xd->plane[i].pre[0];
154
    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
155
  }
156
  vp9_set_mv_search_range(x, &ref_mv);
157

Yaowu Xu's avatar
Yaowu Xu committed
158 159
  assert(x->mv_best_ref_index[ref] <= 2);
  if (x->mv_best_ref_index[ref] < 2)
160
    mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
Yaowu Xu's avatar
Yaowu Xu committed
161
  else
Dmitry Kovalev's avatar
Dmitry Kovalev committed
162
    mvp_full = x->pred_mv[ref];
163 164 165 166

  mvp_full.col >>= 3;
  mvp_full.row >>= 3;

167
  vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
168
                        cond_cost_list(cpi, cost_list),
169
                        &ref_mv, &tmp_mv->as_mv, INT_MAX, 0);
170

171 172 173 174 175
  x->mv_col_min = tmp_col_min;
  x->mv_col_max = tmp_col_max;
  x->mv_row_min = tmp_row_min;
  x->mv_row_max = tmp_row_max;

176 177 178
  // calculate the bit cost on motion vector
  mvp_full.row = tmp_mv->as_mv.row * 8;
  mvp_full.col = tmp_mv->as_mv.col * 8;
179

180 181
  *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
                             x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
182

183
  rate_mode = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]]
184 185 186 187 188 189 190 191 192 193 194
                                  [INTER_OFFSET(NEWMV)];
  rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) >
         best_rd_sofar);

  if (rv) {
    cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
                                 cpi->common.allow_high_precision_mv,
                                 x->errorperbit,
                                 &cpi->fn_ptr[bsize],
                                 cpi->sf.mv.subpel_force_stop,
                                 cpi->sf.mv.subpel_iters_per_step,
195
                                 cond_cost_list(cpi, cost_list),
196
                                 x->nmvjointcost, x->mvcost,
197
                                 &dis, &x->pred_sse[ref], NULL, 0, 0);
198 199
    *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                               x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
200 201 202 203 204 205 206
  }

  if (scaled_ref_frame) {
    int i;
    for (i = 0; i < MAX_MB_PLANE; i++)
      xd->plane[i].pre[0] = backup_yv12[i];
  }
207
  return rv;
208 209
}

210 211 212 213 214 215 216 217 218 219 220 221
static void block_variance(const uint8_t *src, int src_stride,
                           const uint8_t *ref, int ref_stride,
                           int w, int h, unsigned int *sse, int *sum,
                           int block_size, unsigned int *sse8x8,
                           int *sum8x8, unsigned int *var8x8) {
  int i, j, k = 0;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i += block_size) {
    for (j = 0; j < w; j += block_size) {
Johann's avatar
Johann committed
222
      vpx_get8x8var(src + src_stride * i + j, src_stride,
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
                    ref + ref_stride * i + j, ref_stride,
                    &sse8x8[k], &sum8x8[k]);
      *sse += sse8x8[k];
      *sum += sum8x8[k];
      var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6);
      k++;
    }
  }
}

static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
                               unsigned int *sse_i, int *sum_i,
                               unsigned int *var_o, unsigned int *sse_o,
                               int *sum_o) {
  const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
  const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
  const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
  int i, j, k = 0;

  for (i = 0; i < nh; i += 2) {
    for (j = 0; j < nw; j += 2) {
      sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
          sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
      sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
          sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
      var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >>
          (b_width_log2_lookup[unit_size] +
              b_height_log2_lookup[unit_size] + 6));
      k++;
    }
  }
}

static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
                                    MACROBLOCK *x, MACROBLOCKD *xd,
                                    int *out_rate_sum, int64_t *out_dist_sum,
                                    unsigned int *var_y, unsigned int *sse_y,
                                    int mi_row, int mi_col, int *early_term) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
  const int64_t dc_thr = dc_quant * dc_quant >> 6;
  const int64_t ac_thr = ac_quant * ac_quant >> 6;
  unsigned int var;
  int sum;
  int skip_dc = 0;

  const int bw = b_width_log2_lookup[bsize];
  const int bh = b_height_log2_lookup[bsize];
  const int num8x8 = 1 << (bw + bh - 2);
  unsigned int sse8x8[64] = {0};
  int sum8x8[64] = {0};
  unsigned int var8x8[64] = {0};
  TX_SIZE tx_size;
  int i, k;

  // Calculate variance for whole partition, and also save 8x8 blocks' variance
  // to be used in following transform skipping test.
  block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                 4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
  var = sse - (((int64_t)sum * sum) >> (bw + bh + 4));

  *var_y = var;
  *sse_y = sse;

  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
297 298
      tx_size = VPXMIN(max_txsize_lookup[bsize],
                       tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
299 300 301
    else
      tx_size = TX_8X8;

302 303 304 305 306
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
      tx_size = TX_8X8;
    else if (tx_size > TX_16X16)
      tx_size = TX_16X16;
307
  } else {
308 309
    tx_size = VPXMIN(max_txsize_lookup[bsize],
                     tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
310 311 312
  }

  assert(tx_size >= TX_8X8);
313
  xd->mi[0]->mbmi.tx_size = tx_size;
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344

  // Evaluate if the partition block is a skippable block in Y plane.
  {
    unsigned int sse16x16[16] = {0};
    int sum16x16[16] = {0};
    unsigned int var16x16[16] = {0};
    const int num16x16 = num8x8 >> 2;

    unsigned int sse32x32[4] = {0};
    int sum32x32[4] = {0};
    unsigned int var32x32[4] = {0};
    const int num32x32 = num8x8 >> 4;

    int ac_test = 1;
    int dc_test = 1;
    const int num = (tx_size == TX_8X8) ? num8x8 :
        ((tx_size == TX_16X16) ? num16x16 : num32x32);
    const unsigned int *sse_tx = (tx_size == TX_8X8) ? sse8x8 :
        ((tx_size == TX_16X16) ? sse16x16 : sse32x32);
    const unsigned int *var_tx = (tx_size == TX_8X8) ? var8x8 :
        ((tx_size == TX_16X16) ? var16x16 : var32x32);

    // Calculate variance if tx_size > TX_8X8
    if (tx_size >= TX_16X16)
      calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
                         sum16x16);
    if (tx_size == TX_32X32)
      calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
                         sse32x32, sum32x32);

    // Skipping test
345
    x->skip_txfm[0] = SKIP_TXFM_NONE;
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
    for (k = 0; k < num; k++)
      // Check if all ac coefficients can be quantized to zero.
      if (!(var_tx[k] < ac_thr || var == 0)) {
        ac_test = 0;
        break;
      }

    for (k = 0; k < num; k++)
      // Check if dc coefficient can be quantized to zero.
      if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
        dc_test = 0;
        break;
      }

    if (ac_test) {
361
      x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
362 363

      if (dc_test)
364
        x->skip_txfm[0] = SKIP_TXFM_AC_DC;
365 366 367 368 369
    } else if (dc_test) {
      skip_dc = 1;
    }
  }

370
  if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
371 372 373 374 375 376 377 378 379 380 381
    int skip_uv[2] = {0};
    unsigned int var_uv[2];
    unsigned int sse_uv[2];

    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;

    // Transform skipping test in UV planes.
    for (i = 1; i <= 2; i++) {
      struct macroblock_plane *const p = &x->plane[i];
      struct macroblockd_plane *const pd = &xd->plane[i];
382
      const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0]->mbmi, pd);
383
      const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
384 385 386 387 388
      const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
      const int uv_bw = b_width_log2_lookup[uv_bsize];
      const int uv_bh = b_height_log2_lookup[uv_bsize];
      const int sf = (uv_bw - b_width_log2_lookup[unit_size]) +
          (uv_bh - b_height_log2_lookup[unit_size]);
389 390 391 392 393
      const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
      const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
      int j = i - 1;

      vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
394 395
      var_uv[j] = cpi->fn_ptr[uv_bsize].vf(p->src.buf, p->src.stride,
          pd->dst.buf, pd->dst.stride, &sse_uv[j]);
396

397 398 399 400 401
      if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
          (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
        skip_uv[j] = 1;
      else
        break;
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
    }

    // If the transform in YUV planes are skippable, the mode search checks
    // fewer inter modes and doesn't check intra modes.
    if (skip_uv[0] & skip_uv[1]) {
      *early_term = 1;
    }

    return;
  }

  if (!skip_dc) {
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> 3, &rate, &dist);
    }
#else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
  }

  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }

#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
  } else {
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                                 ac_quant >> 3, &rate, &dist);
  }
#else
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                               ac_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH

  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
}

453 454
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
455 456
                              int *out_rate_sum, int64_t *out_dist_sum,
                              unsigned int *var_y, unsigned int *sse_y) {
457 458 459 460
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
461 462
  int rate;
  int64_t dist;
463 464
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
465 466
  const int64_t dc_thr = p->quant_thred[0] >> 6;
  const int64_t ac_thr = p->quant_thred[1] >> 6;
467 468
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
469 470
  unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                           pd->dst.buf, pd->dst.stride, &sse);
471 472
  int skip_dc = 0;

473 474 475
  *var_y = var;
  *sse_y = sse;

476 477
  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
478
      xd->mi[0]->mbmi.tx_size =
479 480
          VPXMIN(max_txsize_lookup[bsize],
                 tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
481
    else
482
      xd->mi[0]->mbmi.tx_size = TX_8X8;
483

484 485 486 487 488
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
      xd->mi[0]->mbmi.tx_size = TX_8X8;
    else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
      xd->mi[0]->mbmi.tx_size = TX_16X16;
489
  } else {
490
    xd->mi[0]->mbmi.tx_size =
491 492
        VPXMIN(max_txsize_lookup[bsize],
               tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
493 494
  }

495 496 497
  // Evaluate if the partition block is a skippable block in Y plane.
  {
    const BLOCK_SIZE unit_size =
498
        txsize_to_bsize[xd->mi[0]->mbmi.tx_size];
499 500 501 502 503 504
    const unsigned int num_blk_log2 =
        (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
        (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
    const unsigned int sse_tx = sse >> num_blk_log2;
    const unsigned int var_tx = var >> num_blk_log2;

505
    x->skip_txfm[0] = SKIP_TXFM_NONE;
506 507
    // Check if all ac coefficients can be quantized to zero.
    if (var_tx < ac_thr || var == 0) {
508
      x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
509 510
      // Check if dc coefficient can be quantized to zero.
      if (sse_tx - var_tx < dc_thr || sse == var)
511
        x->skip_txfm[0] = SKIP_TXFM_AC_DC;
512 513 514
    } else {
      if (sse_tx - var_tx < dc_thr || sse == var)
        skip_dc = 1;
515 516 517
    }
  }

518
  if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
519 520 521 522 523
    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;
    return;
  }

524
  if (!skip_dc) {
525
#if CONFIG_VP9_HIGHBITDEPTH
526 527 528 529 530 531 532 533
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> 3, &rate, &dist);
    }
#else
534
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
535 536
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
537
  }
538

539 540 541 542 543 544 545
  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }
546

547 548
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
549
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
550
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
551
  } else {
552
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
553
                                 ac_quant >> 3, &rate, &dist);
554 555
  }
#else
556
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
557
                               ac_quant >> 3, &rate, &dist);
558 559
#endif  // CONFIG_VP9_HIGHBITDEPTH

560 561
  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
562 563
}

564 565 566 567 568 569 570 571 572 573 574 575 576 577
#if CONFIG_VP9_HIGHBITDEPTH
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
                      int *skippable, int64_t *sse, int plane,
                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int var_y, sse_y;
  (void)plane;
  (void)tx_size;
  model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
  *sse = INT_MAX;
  *skippable = 0;
  return;
}
#else
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
                      int *skippable, int64_t *sse, int plane,
                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *xd = &x->e_mbd;
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const struct macroblock_plane *const p = &x->plane[plane];
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  const int step = 1 << (tx_size << 1);
  const int block_step = (1 << tx_size);
  int block = 0, r, c;
  int shift = tx_size == TX_32X32 ? 0 : 2;
  const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
      xd->mb_to_right_edge >> (5 + pd->subsampling_x));
  const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
      xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
594
  int eob_cost = 0;
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610

  (void)cpi;
  vp9_subtract_plane(x, bsize, plane);
  *skippable = 1;
  // Keep track of the row and column of the blocks we use so that we know
  // if we are in the unrestricted motion border.
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
        const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
        const int16_t *src_diff;
611
        src_diff = &p->src_diff[(r * diff_stride + c) << 2];
612 613 614

        switch (tx_size) {
          case TX_32X32:
615
            vpx_fdct32x32_rd(src_diff, coeff, diff_stride);
616 617 618 619 620 621
            vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
                                  p->round_fp, p->quant_fp, p->quant_shift,
                                  qcoeff, dqcoeff, pd->dequant, eob,
                                  scan_order->scan, scan_order->iscan);
            break;
          case TX_16X16:
622
            vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
            vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_8X8:
            vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
            vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_4X4:
            x->fwd_txm4x4(src_diff, coeff, diff_stride);
            vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          default:
            assert(0);
            break;
        }
646
        *skippable &= (*eob == 0);
647
        eob_cost += 1;
648 649 650 651 652 653
      }
      block += step;
    }
  }

  if (*skippable && *sse < INT64_MAX) {
654
    *rate = 0;
655 656 657 658
    *dist = (*sse << 6) >> shift;
    *sse = *dist;
    return;
  }
659

660 661 662
  block = 0;
  *rate = 0;
  *dist = 0;
663 664
  if (*sse < INT64_MAX)
    *sse = (*sse << 6) >> shift;
665 666 667 668 669 670 671
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
672 673 674 675 676

        if (*eob == 1)
          *rate += (int)abs(qcoeff[0]);
        else if (*eob > 1)
          *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
677

678
        *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
679 680 681 682 683
      }
      block += step;
    }
  }

684 685 686 687
  if (*skippable == 0) {
    *rate <<= 10;
    *rate += (eob_cost << 8);
  }
688
}
689
#endif
690

691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize,
                               MACROBLOCK *x, MACROBLOCKD *xd,
                               int *out_rate_sum, int64_t *out_dist_sum,
                               unsigned int *var_y, unsigned int *sse_y) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  int i;

  *out_rate_sum = 0;
  *out_dist_sum = 0;

  for (i = 1; i <= 2; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
    const uint32_t dc_quant = pd->dequant[0];
    const uint32_t ac_quant = pd->dequant[1];
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
    unsigned int var;

    if (!x->color_sensitivity[i - 1])
      continue;

    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                             pd->dst.buf, pd->dst.stride, &sse);
    *var_y += var;
    *sse_y += sse;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                 dc_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate >> 1;
    *out_dist_sum += dist << 3;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                 ac_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate;
    *out_dist_sum += dist << 4;
  }
}

756 757 758 759 760 761 762 763 764 765 766 767 768
static int get_pred_buffer(PRED_BUFFER *p, int len) {
  int i;

  for (i = 0; i < len; i++) {
    if (!p[i].in_use) {
      p[i].in_use = 1;
      return i;
    }
  }
  return -1;
}

static void free_pred_buffer(PRED_BUFFER *p) {
769 770
  if (p != NULL)
    p->in_use = 0;
771 772
}

773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
                                 MV_REFERENCE_FRAME ref_frame,
                                 PREDICTION_MODE this_mode,
                                 unsigned int var_y, unsigned int sse_y,
                                 struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                 int *rate, int64_t *dist) {
  MACROBLOCKD *xd = &x->e_mbd;

  const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  unsigned int var = var_y, sse = sse_y;
  // Skipping threshold for ac.
  unsigned int thresh_ac;
  // Skipping threshold for dc.
  unsigned int thresh_dc;
  if (x->encode_breakout > 0) {
    // Set a maximum for threshold to avoid big PSNR loss in low bit rate
    // case. Use extreme low threshold for static frames to limit
    // skipping.
    const unsigned int max_thresh = 36000;
    // The encode_breakout input
    const unsigned int min_thresh =
795
        VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh);
796
#if CONFIG_VP9_HIGHBITDEPTH
797
    const int shift = (xd->bd << 1) - 16;
798
#endif
799 800

    // Calculate threshold according to dequant value.
801
    thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
802 803 804 805 806
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
807 808 809 810
    thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);

    // Adjust ac threshold according to partition size.
    thresh_ac >>=
811
        8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
812 813

    thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
814 815 816 817 818
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
  } else {
    thresh_ac = 0;
    thresh_dc = 0;
  }

  // Y skipping condition checking for ac and dc.
  if (var <= thresh_ac && (sse - var) <= thresh_dc) {
    unsigned int sse_u, sse_v;
    unsigned int var_u, var_v;

    // Skip UV prediction unless breakout is zero (lossless) to save
    // computation with low impact on the result
    if (x->encode_breakout == 0) {
      xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
      xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
      vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
    }

    var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
                                    x->plane[1].src.stride,
                                    xd->plane[1].dst.buf,
                                    xd->plane[1].dst.stride, &sse_u);

    // U skipping condition checking
843
    if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
844 845 846 847 848 849
      var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
                                      x->plane[2].src.stride,
                                      xd->plane[2].dst.buf,
                                      xd->plane[2].dst.stride, &sse_v);

      // V skipping condition checking
850
      if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
851 852 853
        x->skip = 1;

        // The cost of skip bit needs to be added.
854
        *rate = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
855
                                    [INTER_OFFSET(this_mode)];
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871

        // More on this part of rate
        // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

        // Scaling factor for SSE from spatial domain to frequency
        // domain is 16. Adjust distortion accordingly.
        // TODO(yunqingwang): In this function, only y-plane dist is
        // calculated.
        *dist = (sse << 4);  // + ((sse_u + sse_v) << 4);

        // *disable_skip = 1;
      }
    }
  }
}

872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
struct estimate_block_intra_args {
  VP9_COMP *cpi;
  MACROBLOCK *x;
  PREDICTION_MODE mode;
  int rate;
  int64_t dist;
};

static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                 TX_SIZE tx_size, void *arg) {
  struct estimate_block_intra_args* const args = arg;
  VP9_COMP *const cpi = args->cpi;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
  uint8_t *const src_buf_base = p->src.buf;
  uint8_t *const dst_buf_base = pd->dst.buf;
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  int i, j;
  int rate;
  int64_t dist;
896
  int64_t this_sse = INT64_MAX;
897 898
  int is_skippable;

899 900 901 902 903 904 905
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  assert(plane == 0);
  (void) plane;

  p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
  pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
  // Use source buffer as an approximation for the fully reconstructed buffer.
906
  vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
907
                          tx_size, args->mode,
908 909
                          x->skip_encode ? p->src.buf : pd->dst.buf,
                          x->skip_encode ? src_stride : dst_stride,
910 911
                          pd->dst.buf, dst_stride,
                          i, j, 0);
912 913

  // TODO(jingning): This needs further refactoring.
914
  block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
915
            bsize_tx, VPXMIN(tx_size, TX_16X16));
916 917
  x->skip_txfm[0] = is_skippable;
  rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
918

919 920 921 922 923 924
  p->src.buf = src_buf_base;
  pd->dst.buf = dst_buf_base;
  args->rate += rate;
  args->dist += dist;
}

925
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
926
  {THR_DC, THR_V_PRED, THR_H_PRED, THR_TM},
927 928 929 930
  {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
  {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
};

931 932 933 934
static const PREDICTION_MODE intra_mode_list[] = {
  DC_PRED, V_PRED, H_PRED, TM_PRED
};

935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
static int mode_offset(const PREDICTION_MODE mode) {
  if (mode >= NEARESTMV) {
    return INTER_OFFSET(mode);
  } else {
    switch (mode) {
      case DC_PRED:
        return 0;
      case V_PRED:
        return 1;
      case H_PRED:
        return 2;
      case TM_PRED:
        return 3;
      default:
        return -1;
    }
  }
}

static INLINE void update_thresh_freq_fact(VP9_COMP *cpi,
                                           TileDataEnc *tile_data,
                                           BLOCK_SIZE bsize,
                                           MV_REFERENCE_FRAME ref_frame,
                                           THR_MODES best_mode_idx,
                                           PREDICTION_MODE mode) {
  THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
  int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx];
  if (thr_mode_idx == best_mode_idx)
    *freq_fact -= (*freq_fact >> 4);
  else
965 966
    *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC,
                        cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
967 968
}

969 970 971
void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  MACROBLOCKD *const xd = &x->e_mbd;
972
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
973 974 975 976
  RD_COST this_rdc, best_rdc;
  PREDICTION_MODE this_mode;
  struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
  const TX_SIZE intra_tx_size =
977 978
      VPXMIN(max_txsize_lookup[bsize],
             tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
979
  MODE_INFO *const mic = xd->mi[0];
980
  int *bmode_costs;
981 982
  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
983 984 985 986 987 988 989 990 991 992 993
  const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
  const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
  bmode_costs = cpi->y_mode_costs[A][L];

  (void) ctx;
  vp9_rd_cost_reset(&best_rdc);
  vp9_rd_cost_reset(&this_rdc);

  mbmi->ref_frame[0] = INTRA_FRAME;
  mbmi->mv[0].as_int = INVALID_MV;
  mbmi->uv_mode = DC_PRED;
James Zern's avatar
James Zern committed
994
  memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019

  // Change the limit of this loop to add other intra prediction
  // mode tests.
  for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
    args.mode = this_mode;
    args.rate = 0;
    args.dist = 0;
    mbmi->tx_size = intra_tx_size;
    vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
                                           estimate_block_intra, &args);
    this_rdc.rate = args.rate;
    this_rdc.dist = args.dist;
    this_rdc.rate += bmode_costs[this_mode];
    this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
                             this_rdc.rate, this_rdc.dist);

    if (this_rdc.rdcost < best_rdc.rdcost) {
      best_rdc = this_rdc;
      mbmi->mode = this_mode;
    }
  }

  *rd_cost = best_rdc;
}

1020 1021 1022
static void init_ref_frame_cost(VP9_COMMON *const cm,
                                MACROBLOCKD *const xd,
                                int ref_frame_cost[MAX_REF_FRAMES]) {
Yaowu Xu's avatar
Yaowu Xu committed
1023 1024 1025
  vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
  vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037

  ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
    ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1);

  ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
}

1038 1039 1040 1041 1042 1043 1044 1045 1046
typedef struct {
  MV_REFERENCE_FRAME ref_frame;
  PREDICTION_MODE pred_mode;
} REF_MODE;

#define RT_INTER_MODES 8
static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
    {LAST_FRAME, ZEROMV},
    {LAST_FRAME, NEARESTMV},
1047
    {GOLDEN_FRAME, ZEROMV},
1048 1049 1050 1051 1052 1053
    {LAST_FRAME, NEARMV},
    {LAST_FRAME, NEWMV},
    {GOLDEN_FRAME, NEARESTMV},
    {GOLDEN_FRAME, NEARMV},
    {GOLDEN_FRAME, NEWMV}
};
1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
static const REF_MODE ref_mode_set_svc[RT_INTER_MODES] = {
    {LAST_FRAME, ZEROMV},
    {GOLDEN_FRAME, ZEROMV},
    {LAST_FRAME, NEARESTMV},
    {LAST_FRAME, NEARMV},
    {GOLDEN_FRAME, NEARESTMV},
    {GOLDEN_FRAME, NEARMV},
    {LAST_FRAME, NEWMV},
    {GOLDEN_FRAME, NEWMV}
};
1064

1065 1066
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
1067
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
1068
                         TileDataEnc *tile_data,
1069 1070
                         int mi_row, int mi_col, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
1071
  VP9_COMMON *const cm = &cpi->common;
1072
  SPEED_FEATURES *const sf = &cpi->sf;
1073
  TileInfo *const tile_info = &tile_data->tile_info;
1074
  MACROBLOCKD *const xd = &x->e_mbd;
1075
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1076
  struct macroblockd_plane *const pd = &xd->plane[0];
1077
  PREDICTION_MODE best_mode = ZEROMV;
1078
  MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
1079
  MV_REFERENCE_FRAME usable_ref_frame;
1080
  TX_SIZE best_tx_size = TX_SIZES;
1081
  INTERP_FILTER best_pred_filter = EIGHTTAP;
1082 1083 1084 1085
  int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
                                    VP9_ALT_FLAG };
1086
  RD_COST this_rdc, best_rdc;
1087
  uint8_t skip_txfm = SKIP_TXFM_NONE, best_mode_skip_txfm = SKIP_TXFM_NONE;
1088 1089 1090
  // var_y and sse_y are saved to be used in skipping checking
  unsigned int var_y = UINT_MAX;
  unsigned int sse_y = UINT_MAX;
1091
  // Reduce the intra cost penalty for small blocks (<=16x16).
1092 1093
  const int reduction_fac = (bsize <= BLOCK_16X16) ?
      ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
1094
  const int intra_cost_penalty = vp9_get_intra_cost_penalty(
1095
      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
1096 1097
  const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
                                           intra_cost_penalty, 0);
1098
  const int *const rd_threshes = cpi->rd.threshes[mbmi->segment_id][bsize];
1099
  const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
Yaowu Xu's avatar
Yaowu Xu committed
1100
  INTERP_FILTER filter_ref;
1101
  const int bsl = mi_width_log2_lookup[bsize];
1102
  const int pred_filter_search = cm->interp_filter == SWITCHABLE ?
1103 1104
      (((mi_row + mi_col) >> bsl) +
       get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
1105
  int const_motion[MAX_REF_FRAMES] = { 0 };
1106 1107
  const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
  const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
1108 1109
  // For speed 6, the result of interp filter is reused later in actual encoding
  // process.
1110 1111
  // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
  PRED_BUFFER tmp[4];
1112
  DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
1113
#if CONFIG_VP9_HIGHBITDEPTH
1114
  DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
1115
#endif
1116 1117 1118
  struct buf_2d orig_dst = pd->dst;
  PRED_BUFFER *best_pred = NULL;
  PRED_BUFFER *this_mode_pred = NULL;
1119
  const int pixels_in_block = bh * bw;
1120
  int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
1121
  int ref_frame_skip_mask = 0;
1122
  int idx;
1123
  int best_pred_sad = INT_MAX;
1124
  int best_early_term = 0;
1125 1126
  int ref_frame_cost[MAX_REF_FRAMES];

1127
  init_ref_frame_cost(cm, xd, ref_frame_cost);
1128

1129
  if (reuse_inter_pred) {
1130
    int i;
1131
    for (i = 0; i < 3; i++) {
1132 1133 1134 1135 1136 1137 1138 1139
#if CONFIG_VP9_HIGHBITDEPTH
      if (cm->use_highbitdepth)
        tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]);
      else
        tmp[i].data = &pred_buf[pixels_in_block * i];
#else
      tmp[i].data = &pred_buf[pixels_in_block * i];
#endif  // CONFIG_VP9_HIGHBITDEPTH
1140 1141 1142 1143 1144 1145 1146 1147
      tmp[i].stride = bw;
      tmp[i].in_use = 0;
    }
    tmp[3].data = pd->dst.buf;
    tmp[3].stride = pd->dst.stride;
    tmp[3].in_use = 0;
  }

1148 1149
  x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  x->skip = 0;
1150

Yaowu Xu's avatar
Yaowu Xu committed
1151
  if (xd->up_available)
1152
    filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
Yaowu Xu's avatar
Yaowu Xu committed
1153
  else if (xd->left_available)