vp9_rdopt.c 117 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49 50 51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

52
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
53 54
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
55

56 57
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
58

59 60
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
61

62 63
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65 66
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68 69 70 71 72 73 74 75
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
76

77
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80 81
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
82

83 84 85
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
86

Yaowu Xu's avatar
Yaowu Xu committed
87
  {I4X4_PRED,    INTRA_FRAME,  NONE},
88

John Koleszar's avatar
John Koleszar committed
89 90 91 92
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
93

John Koleszar's avatar
John Koleszar committed
94 95 96
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
97

John Koleszar's avatar
John Koleszar committed
98 99 100
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
101

John Koleszar's avatar
John Koleszar committed
102 103 104
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
105

John Koleszar's avatar
John Koleszar committed
106 107
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
108
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
109 110
};

111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
#if CONFIG_BALANCED_COEFTREE
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_count *cnoskip,
                             vp9_coeff_probs_model *p,
                             TX_SIZE tx_size) {
  int i, j, k, l;
  for (i = 0; i < BLOCK_TYPES; i++)
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
          vp9_model_to_full_probs(p[i][j][k][l], probs);
          vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs,
                          vp9_coef_tree);
          // Replace the eob node prob with a very small value so that the
          // cost approximately equals the cost without the eob node
          probs[1] = 1;
          vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree);
        }
}
#else
132
static void fill_token_costs(vp9_coeff_count *c,
133
                             vp9_coeff_probs_model *p,
134
                             TX_SIZE tx_size) {
135
  int i, j, k, l;
136
  for (i = 0; i < BLOCK_TYPES; i++)
137 138
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
139 140
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
141
          vp9_model_to_full_probs(p[i][j][k][l], probs);
142 143 144
          vp9_cost_tokens_skip((int *)c[i][j][k][l], probs,
                               vp9_coef_tree);
        }
145
}
146
#endif
147

148 149 150 151
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
152

153
// 3* dc_qlookup[Q]*dc_qlookup[Q];
154

155
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
156 157 158
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

159
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
160 161 162 163 164 165 166
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
167
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
168
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
169
  }
Paul Wilkins's avatar
Paul Wilkins committed
170
}
John Koleszar's avatar
John Koleszar committed
171

172
static int compute_rd_mult(int qindex) {
173
  const int q = vp9_dc_quant(qindex, 0);
174
  return (11 * q * q) >> 2;
175 176
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
177 178 179
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
180 181
}

182

Dmitry Kovalev's avatar
Dmitry Kovalev committed
183
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
184
  int q, i;
John Koleszar's avatar
John Koleszar committed
185

186
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
187

John Koleszar's avatar
John Koleszar committed
188 189 190 191
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
192
  qindex = clamp(qindex, 0, MAXQ);
193

Dmitry Kovalev's avatar
Dmitry Kovalev committed
194
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
195 196 197 198 199
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
200
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
201
  }
202
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
203
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
204

205
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
206

Dmitry Kovalev's avatar
Dmitry Kovalev committed
207 208
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
209 210
  if (q < 8)
    q = 8;
211

John Koleszar's avatar
John Koleszar committed
212 213 214
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
215

John Koleszar's avatar
John Koleszar committed
216 217 218 219 220 221 222
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
223
    }
John Koleszar's avatar
John Koleszar committed
224 225
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
226

John Koleszar's avatar
John Koleszar committed
227 228 229 230 231 232 233
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
234
    }
John Koleszar's avatar
John Koleszar committed
235
  }
John Koleszar's avatar
John Koleszar committed
236

237
#if CONFIG_BALANCED_COEFTREE
238
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
239
                   cpi->mb.token_costs_noskip[TX_4X4],
240
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
241
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
242
                   cpi->mb.token_costs_noskip[TX_8X8],
243
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
244
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
245
                   cpi->mb.token_costs_noskip[TX_16X16],
246
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
247
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
248
                   cpi->mb.token_costs_noskip[TX_32X32],
249
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
250 251 252 253 254 255 256 257 258 259
#else
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
#endif
260

261
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
262 263 264 265
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
266 267
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
268
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
269

270
  if (cpi->common.frame_type != KEY_FRAME) {
271
    vp9_build_nmv_cost_table(
272 273 274 275 276 277
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
278 279
}

280
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
281
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
282

283
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
284 285 286
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
287

John Koleszar's avatar
John Koleszar committed
288
  return error;
John Koleszar's avatar
John Koleszar committed
289 290
}

291
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
292
                              int plane, int block, PLANE_TYPE type,
293 294
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
295 296
                              TX_SIZE tx_size,
                              int y_blocks) {
297
  MACROBLOCKD *const xd = &mb->e_mbd;
298 299
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
300
  int c = 0;
301 302
  int cost = 0, pad;
  const int *scan, *nb;
303 304 305
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
306
  const int ref = mbmi->ref_frame != INTRA_FRAME;
307
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
308
      mb->token_costs[tx_size][type][ref];
309
  ENTROPY_CONTEXT above_ec, left_ec;
310
  TX_TYPE tx_type = DCT_DCT;
311

312
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
313 314 315 316
#if CONFIG_BALANCED_COEFTREE
  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      mb->token_costs_noskip[tx_size][type][ref];
#else
317
  vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
318 319
#endif

320 321
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
322
  const uint8_t * band_translate;
323 324

  // Check for consistency of tx_size with mode info
325
  assert((!type && !plane) || (type && plane));
326 327 328 329 330 331 332
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

333
  switch (tx_size) {
334
    case TX_4X4: {
335
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
336
          get_tx_type_4x4(xd, block) : DCT_DCT;
337 338
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
339
#if !CONFIG_BALANCED_COEFTREE
340
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
341
                                 coef_probs);
342
#endif
343
      seg_eob = 16;
344
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
345
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
346
      break;
347
    }
348 349
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
350
      const int sz = 1 + b_width_log2(sb_type);
351
      const int x = block & ((1 << sz) - 1), y = block - x;
352 353
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
354 355
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
356
      scan = get_scan_8x8(tx_type);
357
#if !CONFIG_BALANCED_COEFTREE
358
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
359
                                 coef_probs);
360
#endif
361
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
362
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
363
      break;
364 365 366
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
367
      const int sz = 2 + b_width_log2(sb_type);
368
      const int x = block & ((1 << sz) - 1), y = block - x;
369 370
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
371
      scan = get_scan_16x16(tx_type);
372
#if !CONFIG_BALANCED_COEFTREE
373
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
374
                                 coef_probs);
375
#endif
376
      seg_eob = 256;
377 378
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
379
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
380
      break;
381
    }
382
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
383
      scan = vp9_default_scan_32x32;
384
#if !CONFIG_BALANCED_COEFTREE
385
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
386
                                 coef_probs);
387
#endif
388
      seg_eob = 1024;
389 390
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
391
      band_translate = vp9_coefband_trans_8x8plus;
392
      break;
Daniel Kang's avatar
Daniel Kang committed
393
    default:
394
      abort();
Daniel Kang's avatar
Daniel Kang committed
395 396
      break;
  }
John Koleszar's avatar
John Koleszar committed
397
  assert(eob <= seg_eob);
398

399
  pt = combine_entropy_contexts(above_ec, left_ec);
400 401
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
402

403 404
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
405

406 407 408 409
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

410
  {
411
    for (c = 0; c < eob; c++) {
412
      int v = qcoeff_ptr[scan[c]];
413
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
414
      int band = get_coef_band(band_translate, c);
415 416
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
417

418 419 420 421 422 423
#if CONFIG_BALANCED_COEFTREE
      if (!c || token_cache[scan[c - 1]])  // do not skip eob
        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
      else
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#else
424
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
425
      if (!c || token_cache[scan[c - 1]])
426
        cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
427
#endif
428
      token_cache[scan[c]] = vp9_pt_energy_class[t];
429
    }
430 431 432
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
433 434 435 436 437
#if CONFIG_BALANCED_COEFTREE
      cost += mb->token_costs_noskip[tx_size][type][ref]
          [get_coef_band(band_translate, c)]
          [pt][DCT_EOB_TOKEN];
#else
438
      cost += mb->token_costs[tx_size][type][ref]
Paul Wilkins's avatar
Paul Wilkins committed
439
          [get_coef_band(band_translate, c)]
440
          [pt][DCT_EOB_TOKEN];
441
#endif
442
    }
443 444
  }

445 446 447
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
448
  }
449

450 451 452
  return cost;
}

453
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
454 455 456 457 458
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
459 460 461
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
462
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
463 464
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
465
  int s0, s1;
466 467 468 469 470 471 472 473 474 475

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
476

477 478 479
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
480

481 482 483 484 485 486
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
487 488 489
    }
  }

490 491 492 493 494 495
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
496 497 498 499 500 501
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
502
    mbmi->txfm_size = TX_16X16;
503
  } else if (cm->txfm_mode == ALLOW_8X8 ||
504 505
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
506
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
507 508 509 510 511
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

512
  *distortion = d[mbmi->txfm_size];
513
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
514 515
  *skip       = s[mbmi->txfm_size];

516 517
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
518 519
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
520 521 522 523
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
524 525
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
526
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
527
  else
528 529
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
530 531
}

532 533
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
534 535 536 537
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
538
    int this_diff = coeff[i] - dqcoeff[i];
539
    error += (unsigned)this_diff * this_diff;
540
  }
541
  error >>= shift;
542

Frank Galligan's avatar
Frank Galligan committed
543
  return error > INT_MAX ? INT_MAX : (int)error;
544 545
}

546 547
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
548
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
549
                     16 << (bwl + bhl), shift);
550
}
551

552 553 554 555
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
556

557 558 559 560 561 562 563 564
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
565 566
}

567 568
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
569
  MACROBLOCKD *const xd = &x->e_mbd;
570 571 572
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
573
  ENTROPY_CONTEXT t_above[16], t_left[16];
574
  int block, cost;
575

576
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
577
             sizeof(ENTROPY_CONTEXT) * bw);
578
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
579
             sizeof(ENTROPY_CONTEXT) * bh);
580

581 582 583
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
584

585 586
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
587

588 589 590
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
591
  }
592 593 594 595

  return cost;
}

596 597 598
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
599

600 601
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
602 603
  }
  return cost;
604 605
}

606 607 608
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
609
  MACROBLOCKD *const xd = &x->e_mbd;
610 611
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
612

613 614
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
615
  *skippable  = vp9_sby_is_skippable(xd, bsize);
616 617
}

618 619
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
620
                            int *skip, BLOCK_SIZE_TYPE bs,
621
                            int64_t txfm_cache[NB_TXFM_MODES]) {
622 623
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
624

625
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
626

627
  if (bs >= BLOCK_SIZE_SB32X32)
628 629
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
630
  if (bs >= BLOCK_SIZE_MB16X16)
631 632 633 634 635 636
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
637 638

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
639
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
640
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
641
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
642

643
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
644
                                     MB_PREDICTION_MODE *best_mode,
645 646 647
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
648 649 650
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
651
  MACROBLOCKD *xd = &x->e_mbd;
652
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
653 654
  int rate = 0;
  int distortion;
655
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
656
  const int src_stride = x->plane[0].src.stride;
657 658 659 660 661
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
662 663
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
664 665 666 667
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
668

Jingning Han's avatar
Jingning Han committed
669
  assert(ib < 4);
670

671 672
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
673
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
674 675

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
676
    int64_t this_rd;
677
    int ratey = 0;
678

679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
    if (cm->frame_type == KEY_FRAME)
      rate = bmode_costs[mode];
    else
      rate = x->mbmode_cost[cm->frame_type][mode];
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
700 701
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
                             dst, xd->plane[0].dst.stride);
702 703 704 705 706 707 708 709 710 711 712 713
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
714

715 716 717 718
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
719

720 721 722 723 724 725 726 727
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
728

729 730
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
731

732 733 734 735 736 737 738
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
739 740 741 742 743 744 745 746 747 748
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
749
    }
John Koleszar's avatar
John Koleszar committed
750
  }
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
770
  }
John Koleszar's avatar
John Koleszar committed
771

John Koleszar's avatar
John Koleszar committed
772
  return best_rd;
John Koleszar's avatar
John Koleszar committed
773 774
}

775 776
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi