vp9_pickmode.c 68.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
12 13 14
#include <limits.h>
#include <math.h>
#include <stdio.h>
15

16
#include "./vp9_rtcd.h"
Johann's avatar
Johann committed
17
#include "./vpx_dsp_rtcd.h"
18 19

#include "vpx_mem/vpx_mem.h"
20
#include "vpx_ports/mem.h"
21

22
#include "vp9/common/vp9_blockd.h"
23 24
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h"
25
#include "vp9/common/vp9_pred_common.h"
26 27
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
28
#include "vp9/common/vp9_scan.h"
29

30
#include "vp9/encoder/vp9_cost.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
31
#include "vp9/encoder/vp9_encoder.h"
32
#include "vp9/encoder/vp9_pickmode.h"
33
#include "vp9/encoder/vp9_ratectrl.h"
34
#include "vp9/encoder/vp9_rd.h"
35

36 37 38 39 40 41
typedef struct {
  uint8_t *data;
  int stride;
  int in_use;
} PRED_BUFFER;

42 43
static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
                      const MACROBLOCKD *xd,
44 45 46 47
                      const TileInfo *const tile,
                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                      int_mv *mv_ref_list,
                      int mi_row, int mi_col) {
48 49 50 51 52 53 54 55 56 57
  const int *ref_sign_bias = cm->ref_frame_sign_bias;
  int i, refmv_count = 0;

  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];

  int different_ref_found = 0;
  int context_counter = 0;
  int const_motion = 0;

  // Blank the reference vector list
James Zern's avatar
James Zern committed
58
  memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
59 60 61 62 63 64 65 66

  // The nearest 2 blocks are treated differently
  // if the size < 8x8 we get the mv from the bmi substructure,
  // and we also need to keep a mode count.
  for (i = 0; i < 2; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
67
                                                   xd->mi_stride];
68 69 70 71 72 73
      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
      // Keep counts for entropy encoding.
      context_counter += mode_2_counter[candidate->mode];
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
74 75
        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
                        refmv_count, mv_ref_list, Done);
76 77 78 79 80 81 82 83 84 85 86 87
    }
  }

  const_motion = 1;

  // Check the rest of the neighbors in much the same way
  // as before except we don't need to keep track of sub blocks or
  // mode counts.
  for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
    const POSITION *const mv_ref = &mv_ref_search[i];
    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
      const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
88
                                                    xd->mi_stride]->mbmi;
89 90 91
      different_ref_found = 1;

      if (candidate->ref_frame[0] == ref_frame)
92
        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
93 94 95 96 97 98 99 100 101 102 103
    }
  }

  // Since we couldn't find 2 mvs from the same reference frame
  // go back through the neighbors and find motion vectors from
  // different reference frames.
  if (different_ref_found && !refmv_count) {
    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
      const POSITION *mv_ref = &mv_ref_search[i];
      if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
        const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
104
                                              * xd->mi_stride]->mbmi;
105 106

        // If the candidate is INTRA we don't want to consider its mv.
107 108
        IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
                                 refmv_count, mv_ref_list, Done);
109 110 111 112 113 114
      }
    }
  }

 Done:

115
  x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter];
116 117 118 119 120 121 122 123

  // Clamp vectors
  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
    clamp_mv_ref(&mv_ref_list[i].as_mv, xd);

  return const_motion;
}

124 125 126 127
static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv,
                                  int64_t best_rd_sofar) {
128
  MACROBLOCKD *xd = &x->e_mbd;
129
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
130
  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
131 132
  const int step_param = cpi->sf.mv.fullpel_search_step_param;
  const int sadpb = x->sadperbit16;
133
  MV mvp_full;
134
  const int ref = mbmi->ref_frame[0];
135
  const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
136 137 138 139 140 141 142
  int dis;
  int rate_mode;
  const int tmp_col_min = x->mv_col_min;
  const int tmp_col_max = x->mv_col_max;
  const int tmp_row_min = x->mv_row_min;
  const int tmp_row_max = x->mv_row_max;
  int rv = 0;
143
  int cost_list[5];
144 145
  const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
                                                                        ref);
146 147 148 149 150 151 152
  if (scaled_ref_frame) {
    int i;
    // Swap out the reference frame for a version that's been scaled to
    // match the resolution of the current frame, allowing the existing
    // motion search code to be used without additional modifications.
    for (i = 0; i < MAX_MB_PLANE; i++)
      backup_yv12[i] = xd->plane[i].pre[0];
153
    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
154
  }
155
  vp9_set_mv_search_range(x, &ref_mv);
156

Yaowu Xu's avatar
Yaowu Xu committed
157 158
  assert(x->mv_best_ref_index[ref] <= 2);
  if (x->mv_best_ref_index[ref] < 2)
159
    mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
Yaowu Xu's avatar
Yaowu Xu committed
160
  else
Dmitry Kovalev's avatar
Dmitry Kovalev committed
161
    mvp_full = x->pred_mv[ref];
162 163 164 165

  mvp_full.col >>= 3;
  mvp_full.row >>= 3;

166
  vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
167
                        cond_cost_list(cpi, cost_list),
168
                        &ref_mv, &tmp_mv->as_mv, INT_MAX, 0);
169

170 171 172 173 174
  x->mv_col_min = tmp_col_min;
  x->mv_col_max = tmp_col_max;
  x->mv_row_min = tmp_row_min;
  x->mv_row_max = tmp_row_max;

175 176 177
  // calculate the bit cost on motion vector
  mvp_full.row = tmp_mv->as_mv.row * 8;
  mvp_full.col = tmp_mv->as_mv.col * 8;
178

179 180
  *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
                             x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
181

182
  rate_mode = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]]
183 184 185 186 187 188 189 190 191 192 193
                                  [INTER_OFFSET(NEWMV)];
  rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) >
         best_rd_sofar);

  if (rv) {
    cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
                                 cpi->common.allow_high_precision_mv,
                                 x->errorperbit,
                                 &cpi->fn_ptr[bsize],
                                 cpi->sf.mv.subpel_force_stop,
                                 cpi->sf.mv.subpel_iters_per_step,
194
                                 cond_cost_list(cpi, cost_list),
195
                                 x->nmvjointcost, x->mvcost,
196
                                 &dis, &x->pred_sse[ref], NULL, 0, 0);
197 198
    *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                               x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
199 200 201 202 203 204 205
  }

  if (scaled_ref_frame) {
    int i;
    for (i = 0; i < MAX_MB_PLANE; i++)
      xd->plane[i].pre[0] = backup_yv12[i];
  }
206
  return rv;
207 208
}

209 210 211 212 213 214 215 216 217 218 219 220
static void block_variance(const uint8_t *src, int src_stride,
                           const uint8_t *ref, int ref_stride,
                           int w, int h, unsigned int *sse, int *sum,
                           int block_size, unsigned int *sse8x8,
                           int *sum8x8, unsigned int *var8x8) {
  int i, j, k = 0;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i += block_size) {
    for (j = 0; j < w; j += block_size) {
Johann's avatar
Johann committed
221
      vpx_get8x8var(src + src_stride * i + j, src_stride,
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
                    ref + ref_stride * i + j, ref_stride,
                    &sse8x8[k], &sum8x8[k]);
      *sse += sse8x8[k];
      *sum += sum8x8[k];
      var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6);
      k++;
    }
  }
}

static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
                               unsigned int *sse_i, int *sum_i,
                               unsigned int *var_o, unsigned int *sse_o,
                               int *sum_o) {
  const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
  const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
  const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
  int i, j, k = 0;

  for (i = 0; i < nh; i += 2) {
    for (j = 0; j < nw; j += 2) {
      sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
          sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
      sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
          sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
      var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >>
          (b_width_log2_lookup[unit_size] +
              b_height_log2_lookup[unit_size] + 6));
      k++;
    }
  }
}

static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
                                    MACROBLOCK *x, MACROBLOCKD *xd,
                                    int *out_rate_sum, int64_t *out_dist_sum,
                                    unsigned int *var_y, unsigned int *sse_y,
                                    int mi_row, int mi_col, int *early_term) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
  const int64_t dc_thr = dc_quant * dc_quant >> 6;
  const int64_t ac_thr = ac_quant * ac_quant >> 6;
  unsigned int var;
  int sum;
  int skip_dc = 0;

  const int bw = b_width_log2_lookup[bsize];
  const int bh = b_height_log2_lookup[bsize];
  const int num8x8 = 1 << (bw + bh - 2);
  unsigned int sse8x8[64] = {0};
  int sum8x8[64] = {0};
  unsigned int var8x8[64] = {0};
  TX_SIZE tx_size;
  int i, k;

  // Calculate variance for whole partition, and also save 8x8 blocks' variance
  // to be used in following transform skipping test.
  block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                 4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
  var = sse - (((int64_t)sum * sum) >> (bw + bh + 4));

  *var_y = var;
  *sse_y = sse;

  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
      tx_size = MIN(max_txsize_lookup[bsize],
                    tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
    else
      tx_size = TX_8X8;

301 302 303 304 305
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
      tx_size = TX_8X8;
    else if (tx_size > TX_16X16)
      tx_size = TX_16X16;
306 307 308 309 310 311
  } else {
    tx_size = MIN(max_txsize_lookup[bsize],
                  tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  }

  assert(tx_size >= TX_8X8);
312
  xd->mi[0]->mbmi.tx_size = tx_size;
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380

  // Evaluate if the partition block is a skippable block in Y plane.
  {
    unsigned int sse16x16[16] = {0};
    int sum16x16[16] = {0};
    unsigned int var16x16[16] = {0};
    const int num16x16 = num8x8 >> 2;

    unsigned int sse32x32[4] = {0};
    int sum32x32[4] = {0};
    unsigned int var32x32[4] = {0};
    const int num32x32 = num8x8 >> 4;

    int ac_test = 1;
    int dc_test = 1;
    const int num = (tx_size == TX_8X8) ? num8x8 :
        ((tx_size == TX_16X16) ? num16x16 : num32x32);
    const unsigned int *sse_tx = (tx_size == TX_8X8) ? sse8x8 :
        ((tx_size == TX_16X16) ? sse16x16 : sse32x32);
    const unsigned int *var_tx = (tx_size == TX_8X8) ? var8x8 :
        ((tx_size == TX_16X16) ? var16x16 : var32x32);

    // Calculate variance if tx_size > TX_8X8
    if (tx_size >= TX_16X16)
      calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
                         sum16x16);
    if (tx_size == TX_32X32)
      calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
                         sse32x32, sum32x32);

    // Skipping test
    x->skip_txfm[0] = 0;
    for (k = 0; k < num; k++)
      // Check if all ac coefficients can be quantized to zero.
      if (!(var_tx[k] < ac_thr || var == 0)) {
        ac_test = 0;
        break;
      }

    for (k = 0; k < num; k++)
      // Check if dc coefficient can be quantized to zero.
      if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
        dc_test = 0;
        break;
      }

    if (ac_test) {
      x->skip_txfm[0] = 2;

      if (dc_test)
        x->skip_txfm[0] = 1;
    } else if (dc_test) {
      skip_dc = 1;
    }
  }

  if (x->skip_txfm[0] == 1) {
    int skip_uv[2] = {0};
    unsigned int var_uv[2];
    unsigned int sse_uv[2];

    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;

    // Transform skipping test in UV planes.
    for (i = 1; i <= 2; i++) {
      struct macroblock_plane *const p = &x->plane[i];
      struct macroblockd_plane *const pd = &xd->plane[i];
381
      const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0]->mbmi, pd);
382
      const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
383 384 385 386 387
      const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
      const int uv_bw = b_width_log2_lookup[uv_bsize];
      const int uv_bh = b_height_log2_lookup[uv_bsize];
      const int sf = (uv_bw - b_width_log2_lookup[unit_size]) +
          (uv_bh - b_height_log2_lookup[unit_size]);
388 389 390 391 392
      const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
      const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
      int j = i - 1;

      vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
393 394
      var_uv[j] = cpi->fn_ptr[uv_bsize].vf(p->src.buf, p->src.stride,
          pd->dst.buf, pd->dst.stride, &sse_uv[j]);
395

396 397 398 399 400
      if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
          (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
        skip_uv[j] = 1;
      else
        break;
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
    }

    // If the transform in YUV planes are skippable, the mode search checks
    // fewer inter modes and doesn't check intra modes.
    if (skip_uv[0] & skip_uv[1]) {
      *early_term = 1;
    }

    return;
  }

  if (!skip_dc) {
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> 3, &rate, &dist);
    }
#else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
  }

  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }

#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
  } else {
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                                 ac_quant >> 3, &rate, &dist);
  }
#else
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
                               ac_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH

  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
}

452 453
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
454 455
                              int *out_rate_sum, int64_t *out_dist_sum,
                              unsigned int *var_y, unsigned int *sse_y) {
456 457 458 459
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
460 461
  int rate;
  int64_t dist;
462 463
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
464 465
  const int64_t dc_thr = p->quant_thred[0] >> 6;
  const int64_t ac_thr = p->quant_thred[1] >> 6;
466 467
  const uint32_t dc_quant = pd->dequant[0];
  const uint32_t ac_quant = pd->dequant[1];
468 469
  unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                           pd->dst.buf, pd->dst.stride, &sse);
470 471
  int skip_dc = 0;

472 473 474
  *var_y = var;
  *sse_y = sse;

475 476
  if (cpi->common.tx_mode == TX_MODE_SELECT) {
    if (sse > (var << 2))
477
      xd->mi[0]->mbmi.tx_size =
478 479
          MIN(max_txsize_lookup[bsize],
              tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
480
    else
481
      xd->mi[0]->mbmi.tx_size = TX_8X8;
482

483 484 485 486 487
    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
      xd->mi[0]->mbmi.tx_size = TX_8X8;
    else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
      xd->mi[0]->mbmi.tx_size = TX_16X16;
488
  } else {
489
    xd->mi[0]->mbmi.tx_size =
490 491
        MIN(max_txsize_lookup[bsize],
            tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
492 493
  }

494 495 496
  // Evaluate if the partition block is a skippable block in Y plane.
  {
    const BLOCK_SIZE unit_size =
497
        txsize_to_bsize[xd->mi[0]->mbmi.tx_size];
498 499 500 501 502 503 504 505 506 507 508 509 510
    const unsigned int num_blk_log2 =
        (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
        (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
    const unsigned int sse_tx = sse >> num_blk_log2;
    const unsigned int var_tx = var >> num_blk_log2;

    x->skip_txfm[0] = 0;
    // Check if all ac coefficients can be quantized to zero.
    if (var_tx < ac_thr || var == 0) {
      x->skip_txfm[0] = 2;
      // Check if dc coefficient can be quantized to zero.
      if (sse_tx - var_tx < dc_thr || sse == var)
        x->skip_txfm[0] = 1;
511 512 513
    } else {
      if (sse_tx - var_tx < dc_thr || sse == var)
        skip_dc = 1;
514 515 516 517 518 519 520 521 522
    }
  }

  if (x->skip_txfm[0] == 1) {
    *out_rate_sum = 0;
    *out_dist_sum = sse << 4;
    return;
  }

523
  if (!skip_dc) {
524
#if CONFIG_VP9_HIGHBITDEPTH
525 526 527 528 529 530 531 532
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
                                   dc_quant >> 3, &rate, &dist);
    }
#else
533
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
534 535
                                 dc_quant >> 3, &rate, &dist);
#endif  // CONFIG_VP9_HIGHBITDEPTH
536
  }
537

538 539 540 541 542 543 544
  if (!skip_dc) {
    *out_rate_sum = rate >> 1;
    *out_dist_sum = dist << 3;
  } else {
    *out_rate_sum = 0;
    *out_dist_sum = (sse - var) << 4;
  }
545

546 547
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
548
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
549
                                 ac_quant >> (xd->bd - 5), &rate, &dist);
550
  } else {
551
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
552
                                 ac_quant >> 3, &rate, &dist);
553 554
  }
#else
555
  vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
556
                               ac_quant >> 3, &rate, &dist);
557 558
#endif  // CONFIG_VP9_HIGHBITDEPTH

559 560
  *out_rate_sum += rate;
  *out_dist_sum += dist << 4;
561 562
}

563 564 565 566 567 568 569 570 571 572 573 574 575 576
#if CONFIG_VP9_HIGHBITDEPTH
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
                      int *skippable, int64_t *sse, int plane,
                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int var_y, sse_y;
  (void)plane;
  (void)tx_size;
  model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
  *sse = INT_MAX;
  *skippable = 0;
  return;
}
#else
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
                      int *skippable, int64_t *sse, int plane,
                      BLOCK_SIZE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *xd = &x->e_mbd;
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const struct macroblock_plane *const p = &x->plane[plane];
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  const int step = 1 << (tx_size << 1);
  const int block_step = (1 << tx_size);
  int block = 0, r, c;
  int shift = tx_size == TX_32X32 ? 0 : 2;
  const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
      xd->mb_to_right_edge >> (5 + pd->subsampling_x));
  const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
      xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
593
  int eob_cost = 0;
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609

  (void)cpi;
  vp9_subtract_plane(x, bsize, plane);
  *skippable = 1;
  // Keep track of the row and column of the blocks we use so that we know
  // if we are in the unrestricted motion border.
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
        const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
        const int16_t *src_diff;
610
        src_diff = &p->src_diff[(r * diff_stride + c) << 2];
611 612 613

        switch (tx_size) {
          case TX_32X32:
614
            vpx_fdct32x32_rd(src_diff, coeff, diff_stride);
615 616 617 618 619 620
            vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
                                  p->round_fp, p->quant_fp, p->quant_shift,
                                  qcoeff, dqcoeff, pd->dequant, eob,
                                  scan_order->scan, scan_order->iscan);
            break;
          case TX_16X16:
621
            vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
            vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_8X8:
            vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
            vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          case TX_4X4:
            x->fwd_txm4x4(src_diff, coeff, diff_stride);
            vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, eob,
                            scan_order->scan, scan_order->iscan);
            break;
          default:
            assert(0);
            break;
        }
645
        *skippable &= (*eob == 0);
646
        eob_cost += 1;
647 648 649 650 651 652
      }
      block += step;
    }
  }

  if (*skippable && *sse < INT64_MAX) {
653
    *rate = 0;
654 655 656 657
    *dist = (*sse << 6) >> shift;
    *sse = *dist;
    return;
  }
658

659 660 661
  block = 0;
  *rate = 0;
  *dist = 0;
662 663
  if (*sse < INT64_MAX)
    *sse = (*sse << 6) >> shift;
664 665 666 667 668 669 670
  for (r = 0; r < max_blocks_high; r += block_step) {
    for (c = 0; c < num_4x4_w; c += block_step) {
      if (c < max_blocks_wide) {
        tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
        tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
        uint16_t *const eob = &p->eobs[block];
671 672 673 674 675

        if (*eob == 1)
          *rate += (int)abs(qcoeff[0]);
        else if (*eob > 1)
          *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
676

677
        *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
678 679 680 681 682
      }
      block += step;
    }
  }

683 684 685 686
  if (*skippable == 0) {
    *rate <<= 10;
    *rate += (eob_cost << 8);
  }
687
}
688
#endif
689

690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize,
                               MACROBLOCK *x, MACROBLOCKD *xd,
                               int *out_rate_sum, int64_t *out_dist_sum,
                               unsigned int *var_y, unsigned int *sse_y) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  unsigned int sse;
  int rate;
  int64_t dist;
  int i;

  *out_rate_sum = 0;
  *out_dist_sum = 0;

  for (i = 1; i <= 2; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
    const uint32_t dc_quant = pd->dequant[0];
    const uint32_t ac_quant = pd->dequant[1];
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
    unsigned int var;

    if (!x->color_sensitivity[i - 1])
      continue;

    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                             pd->dst.buf, pd->dst.stride, &sse);
    *var_y += var;
    *sse_y += sse;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                   dc_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
                                 dc_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate >> 1;
    *out_dist_sum += dist << 3;

  #if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> (xd->bd - 5), &rate, &dist);
    } else {
      vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                   ac_quant >> 3, &rate, &dist);
    }
  #else
    vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
                                 ac_quant >> 3, &rate, &dist);
  #endif  // CONFIG_VP9_HIGHBITDEPTH

    *out_rate_sum += rate;
    *out_dist_sum += dist << 4;
  }
}

755 756 757 758 759 760 761 762 763 764 765 766 767
static int get_pred_buffer(PRED_BUFFER *p, int len) {
  int i;

  for (i = 0; i < len; i++) {
    if (!p[i].in_use) {
      p[i].in_use = 1;
      return i;
    }
  }
  return -1;
}

static void free_pred_buffer(PRED_BUFFER *p) {
768 769
  if (p != NULL)
    p->in_use = 0;
770 771
}

772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
                                 MV_REFERENCE_FRAME ref_frame,
                                 PREDICTION_MODE this_mode,
                                 unsigned int var_y, unsigned int sse_y,
                                 struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                 int *rate, int64_t *dist) {
  MACROBLOCKD *xd = &x->e_mbd;

  const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  unsigned int var = var_y, sse = sse_y;
  // Skipping threshold for ac.
  unsigned int thresh_ac;
  // Skipping threshold for dc.
  unsigned int thresh_dc;
  if (x->encode_breakout > 0) {
    // Set a maximum for threshold to avoid big PSNR loss in low bit rate
    // case. Use extreme low threshold for static frames to limit
    // skipping.
    const unsigned int max_thresh = 36000;
    // The encode_breakout input
    const unsigned int min_thresh =
        MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
795
#if CONFIG_VP9_HIGHBITDEPTH
796
    const int shift = (xd->bd << 1) - 16;
797
#endif
798 799

    // Calculate threshold according to dequant value.
800
    thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
801 802 803 804 805
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
806 807 808 809
    thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);

    // Adjust ac threshold according to partition size.
    thresh_ac >>=
810
        8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
811 812

    thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
813 814 815 816 817
#if CONFIG_VP9_HIGHBITDEPTH
    if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
      thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
  } else {
    thresh_ac = 0;
    thresh_dc = 0;
  }

  // Y skipping condition checking for ac and dc.
  if (var <= thresh_ac && (sse - var) <= thresh_dc) {
    unsigned int sse_u, sse_v;
    unsigned int var_u, var_v;

    // Skip UV prediction unless breakout is zero (lossless) to save
    // computation with low impact on the result
    if (x->encode_breakout == 0) {
      xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
      xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
      vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
    }

    var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
                                    x->plane[1].src.stride,
                                    xd->plane[1].dst.buf,
                                    xd->plane[1].dst.stride, &sse_u);

    // U skipping condition checking
842
    if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
843 844 845 846 847 848
      var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
                                      x->plane[2].src.stride,
                                      xd->plane[2].dst.buf,
                                      xd->plane[2].dst.stride, &sse_v);

      // V skipping condition checking
849
      if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
850 851 852
        x->skip = 1;

        // The cost of skip bit needs to be added.
853
        *rate = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
854
                                    [INTER_OFFSET(this_mode)];
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870

        // More on this part of rate
        // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

        // Scaling factor for SSE from spatial domain to frequency
        // domain is 16. Adjust distortion accordingly.
        // TODO(yunqingwang): In this function, only y-plane dist is
        // calculated.
        *dist = (sse << 4);  // + ((sse_u + sse_v) << 4);

        // *disable_skip = 1;
      }
    }
  }
}

871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894
struct estimate_block_intra_args {
  VP9_COMP *cpi;
  MACROBLOCK *x;
  PREDICTION_MODE mode;
  int rate;
  int64_t dist;
};

static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                 TX_SIZE tx_size, void *arg) {
  struct estimate_block_intra_args* const args = arg;
  VP9_COMP *const cpi = args->cpi;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
  uint8_t *const src_buf_base = p->src.buf;
  uint8_t *const dst_buf_base = pd->dst.buf;
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  int i, j;
  int rate;
  int64_t dist;
895
  int64_t this_sse = INT64_MAX;
896 897
  int is_skippable;

898 899 900 901 902 903 904
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  assert(plane == 0);
  (void) plane;

  p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
  pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
  // Use source buffer as an approximation for the fully reconstructed buffer.
905
  vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
906
                          tx_size, args->mode,
907 908
                          x->skip_encode ? p->src.buf : pd->dst.buf,
                          x->skip_encode ? src_stride : dst_stride,
909 910
                          pd->dst.buf, dst_stride,
                          i, j, 0);
911 912

  // TODO(jingning): This needs further refactoring.
913 914 915 916
  block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
            bsize_tx, MIN(tx_size, TX_16X16));
  x->skip_txfm[0] = is_skippable;
  rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
917

918 919 920 921 922 923
  p->src.buf = src_buf_base;
  pd->dst.buf = dst_buf_base;
  args->rate += rate;
  args->dist += dist;
}

924
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
925
  {THR_DC, THR_V_PRED, THR_H_PRED, THR_TM},
926 927 928 929
  {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
  {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
};

930 931 932 933
static const PREDICTION_MODE intra_mode_list[] = {
  DC_PRED, V_PRED, H_PRED, TM_PRED
};

934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
static int mode_offset(const PREDICTION_MODE mode) {
  if (mode >= NEARESTMV) {
    return INTER_OFFSET(mode);
  } else {
    switch (mode) {
      case DC_PRED:
        return 0;
      case V_PRED:
        return 1;
      case H_PRED:
        return 2;
      case TM_PRED:
        return 3;
      default:
        return -1;
    }
  }
}

static INLINE void update_thresh_freq_fact(VP9_COMP *cpi,
                                           TileDataEnc *tile_data,
                                           BLOCK_SIZE bsize,
                                           MV_REFERENCE_FRAME ref_frame,
                                           THR_MODES best_mode_idx,
                                           PREDICTION_MODE mode) {
  THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
  int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx];
  if (thr_mode_idx == best_mode_idx)
    *freq_fact -= (*freq_fact >> 4);
  else
    *freq_fact = MIN(*freq_fact + RD_THRESH_INC,
        cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
}

968 969 970
void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  MACROBLOCKD *const xd = &x->e_mbd;
971
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
972 973 974 975 976 977
  RD_COST this_rdc, best_rdc;
  PREDICTION_MODE this_mode;
  struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
  const TX_SIZE intra_tx_size =
      MIN(max_txsize_lookup[bsize],
          tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
978
  MODE_INFO *const mic = xd->mi[0];
979
  int *bmode_costs;
980 981
  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
982 983 984 985 986 987 988 989 990 991 992
  const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
  const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
  bmode_costs = cpi->y_mode_costs[A][L];

  (void) ctx;
  vp9_rd_cost_reset(&best_rdc);
  vp9_rd_cost_reset(&this_rdc);

  mbmi->ref_frame[0] = INTRA_FRAME;
  mbmi->mv[0].as_int = INVALID_MV;
  mbmi->uv_mode = DC_PRED;
James Zern's avatar
James Zern committed
993
  memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018

  // Change the limit of this loop to add other intra prediction
  // mode tests.
  for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
    args.mode = this_mode;
    args.rate = 0;
    args.dist = 0;
    mbmi->tx_size = intra_tx_size;
    vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
                                           estimate_block_intra, &args);
    this_rdc.rate = args.rate;
    this_rdc.dist = args.dist;
    this_rdc.rate += bmode_costs[this_mode];
    this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
                             this_rdc.rate, this_rdc.dist);

    if (this_rdc.rdcost < best_rdc.rdcost) {
      best_rdc = this_rdc;
      mbmi->mode = this_mode;
    }
  }

  *rd_cost = best_rdc;
}

1019 1020 1021
static void init_ref_frame_cost(VP9_COMMON *const cm,
                                MACROBLOCKD *const xd,
                                int ref_frame_cost[MAX_REF_FRAMES]) {
Yaowu Xu's avatar
Yaowu Xu committed
1022 1023 1024
  vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
  vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036

  ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
    ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1);

  ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
}

1037 1038 1039 1040 1041 1042 1043 1044 1045
typedef struct {
  MV_REFERENCE_FRAME ref_frame;
  PREDICTION_MODE pred_mode;
} REF_MODE;

#define RT_INTER_MODES 8
static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
    {LAST_FRAME, ZEROMV},
    {LAST_FRAME, NEARESTMV},
1046
    {GOLDEN_FRAME, ZEROMV},
1047 1048 1049 1050 1051 1052 1053
    {LAST_FRAME, NEARMV},
    {LAST_FRAME, NEWMV},
    {GOLDEN_FRAME, NEARESTMV},
    {GOLDEN_FRAME, NEARMV},
    {GOLDEN_FRAME, NEWMV}
};

1054 1055
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
1056
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
1057
                         TileDataEnc *tile_data,
1058 1059
                         int mi_row, int mi_col, RD_COST *rd_cost,
                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
1060
  VP9_COMMON *const cm = &cpi->common;
1061
  SPEED_FEATURES *const sf = &cpi->sf;
1062
  TileInfo *const tile_info = &tile_data->tile_info;
1063
  MACROBLOCKD *const xd = &x->e_mbd;
1064
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1065
  struct macroblockd_plane *const pd = &xd->plane[0];
1066
  PREDICTION_MODE best_mode = ZEROMV;
1067
  MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
1068
  MV_REFERENCE_FRAME usable_ref_frame;
1069
  TX_SIZE best_tx_size = TX_SIZES;
1070
  INTERP_FILTER best_pred_filter = EIGHTTAP;
1071 1072 1073 1074
  int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
                                    VP9_ALT_FLAG };
1075
  RD_COST this_rdc, best_rdc;
1076
  uint8_t skip_txfm = 0, best_mode_skip_txfm = 0;
1077 1078 1079
  // var_y and sse_y are saved to be used in skipping checking
  unsigned int var_y = UINT_MAX;
  unsigned int sse_y = UINT_MAX;
1080
  // Reduce the intra cost penalty for small blocks (<=16x16).
1081 1082
  const int reduction_fac = (bsize <= BLOCK_16X16) ?
      ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
1083
  const int intra_cost_penalty = vp9_get_intra_cost_penalty(
1084
      cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
1085 1086
  const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
                                           intra_cost_penalty, 0);
1087
  const int *const rd_threshes = cpi->rd.threshes[mbmi->segment_id][bsize];
1088
  const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
Yaowu Xu's avatar
Yaowu Xu committed
1089
  INTERP_FILTER filter_ref;
1090
  const int bsl = mi_width_log2_lookup[bsize];
1091
  const int pred_filter_search = cm->interp_filter == SWITCHABLE ?
1092 1093
      (((mi_row + mi_col) >> bsl) +
       get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
1094
  int const_motion[MAX_REF_FRAMES] = { 0 };
1095 1096
  const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
  const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
1097 1098
  // For speed 6, the result of interp filter is reused later in actual encoding
  // process.
1099 1100
  // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
  PRED_BUFFER tmp[4];
1101
  DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
1102
#if CONFIG_VP9_HIGHBITDEPTH
1103
  DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
1104
#endif
1105 1106 1107
  struct buf_2d orig_dst = pd->dst;
  PRED_BUFFER *best_pred = NULL;
  PRED_BUFFER *this_mode_pred = NULL;
1108
  const int pixels_in_block = bh * bw;
1109
  int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
1110
  int ref_frame_skip_mask = 0;
1111
  int idx;
1112
  int best_pred_sad = INT_MAX;
1113
  int best_early_term = 0;
1114 1115
  int ref_frame_cost[MAX_REF_FRAMES];