vp9_rdopt.c 119 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49 50 51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56 57
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
58

59 60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62 63
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65 66
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68 69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71 72 73 74 75 76 77 78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83 84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86 87 88
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
89

Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
  {I4X4_PRED, INTRA_FRAME,  NONE},
91

John Koleszar's avatar
John Koleszar committed
92 93 94 95
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97 98 99
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
100

John Koleszar's avatar
John Koleszar committed
101 102 103
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
104

John Koleszar's avatar
John Koleszar committed
105 106 107
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
108

John Koleszar's avatar
John Koleszar committed
109 110
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
111
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
112 113
};

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
#if CONFIG_BALANCED_COEFTREE
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_count *cnoskip,
                             vp9_coeff_probs_model *p,
                             TX_SIZE tx_size) {
  int i, j, k, l;
  for (i = 0; i < BLOCK_TYPES; i++)
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
          vp9_model_to_full_probs(p[i][j][k][l], probs);
          vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs,
                          vp9_coef_tree);
          // Replace the eob node prob with a very small value so that the
          // cost approximately equals the cost without the eob node
          probs[1] = 1;
          vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree);
        }
}
#else
135
static void fill_token_costs(vp9_coeff_count *c,
136
                             vp9_coeff_probs_model *p,
137
                             TX_SIZE tx_size) {
138
  int i, j, k, l;
139
  for (i = 0; i < BLOCK_TYPES; i++)
140 141
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
142 143
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
144
          vp9_model_to_full_probs(p[i][j][k][l], probs);
145 146 147
          vp9_cost_tokens_skip((int *)c[i][j][k][l], probs,
                               vp9_coef_tree);
        }
148
}
149
#endif
150

151 152 153 154
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
155

156
// 3* dc_qlookup[Q]*dc_qlookup[Q];
157

158
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
159 160 161
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

162
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
163 164 165 166 167 168 169
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
170
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
171
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
172
  }
Paul Wilkins's avatar
Paul Wilkins committed
173
}
John Koleszar's avatar
John Koleszar committed
174

175
static int compute_rd_mult(int qindex) {
176
  const int q = vp9_dc_quant(qindex, 0);
177
  return (11 * q * q) >> 2;
178 179
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
180 181 182
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
183 184
}

185

Dmitry Kovalev's avatar
Dmitry Kovalev committed
186
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
187
  int q, i;
John Koleszar's avatar
John Koleszar committed
188

189
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
190

John Koleszar's avatar
John Koleszar committed
191 192 193 194
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
195
  qindex = clamp(qindex, 0, MAXQ);
196

Dmitry Kovalev's avatar
Dmitry Kovalev committed
197
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
198 199 200 201 202
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
203
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
204
  }
205
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
206
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
207

208
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
209

Dmitry Kovalev's avatar
Dmitry Kovalev committed
210 211
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
212 213
  if (q < 8)
    q = 8;
214

John Koleszar's avatar
John Koleszar committed
215 216 217
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
218

John Koleszar's avatar
John Koleszar committed
219 220 221 222 223 224 225
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
226
    }
John Koleszar's avatar
John Koleszar committed
227 228
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
229

John Koleszar's avatar
John Koleszar committed
230 231 232 233 234 235 236
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
237
    }
John Koleszar's avatar
John Koleszar committed
238
  }
John Koleszar's avatar
John Koleszar committed
239

240
#if CONFIG_BALANCED_COEFTREE
241
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
242
                   cpi->mb.token_costs_noskip[TX_4X4],
243
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
244
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
245
                   cpi->mb.token_costs_noskip[TX_8X8],
246
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
247
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
248
                   cpi->mb.token_costs_noskip[TX_16X16],
249
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
250
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
251
                   cpi->mb.token_costs_noskip[TX_32X32],
252
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
253 254 255 256 257 258 259 260 261 262
#else
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
#endif
263

264
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
265 266 267 268
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
269
  /*rough estimate for costing*/
270
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
271

272
  if (cpi->common.frame_type != KEY_FRAME) {
273
    vp9_build_nmv_cost_table(
274 275 276 277 278 279
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
280 281
}

282
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
283
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
284

285
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
286 287 288
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
289

John Koleszar's avatar
John Koleszar committed
290
  return error;
John Koleszar's avatar
John Koleszar committed
291 292
}

293
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
294
                              int plane, int block, PLANE_TYPE type,
295 296
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
297 298
                              TX_SIZE tx_size,
                              int y_blocks) {
299
  MACROBLOCKD *const xd = &mb->e_mbd;
300 301
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
302
  int c = 0;
303 304
  int cost = 0, pad;
  const int *scan, *nb;
305 306 307
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
308
  const int ref = mbmi->ref_frame != INTRA_FRAME;
309
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
310
      mb->token_costs[tx_size][type][ref];
311
  ENTROPY_CONTEXT above_ec, left_ec;
312
  TX_TYPE tx_type = DCT_DCT;
313

314
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
315 316 317 318
#if CONFIG_BALANCED_COEFTREE
  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      mb->token_costs_noskip[tx_size][type][ref];
#else
319
  vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
320 321
#endif

322 323
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
324
  const uint8_t * band_translate;
325 326

  // Check for consistency of tx_size with mode info
327
  assert((!type && !plane) || (type && plane));
328 329 330 331 332 333 334
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

335
  switch (tx_size) {
336
    case TX_4X4: {
337
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
338
          get_tx_type_4x4(xd, block) : DCT_DCT;
339 340
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
341
#if !CONFIG_BALANCED_COEFTREE
342
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
343
                                 coef_probs);
344
#endif
345
      seg_eob = 16;
346
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
347
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
348
      break;
349
    }
350 351
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
352
      const int sz = 1 + b_width_log2(sb_type);
353
      const int x = block & ((1 << sz) - 1), y = block - x;
354 355
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
356 357
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
358
      scan = get_scan_8x8(tx_type);
359
#if !CONFIG_BALANCED_COEFTREE
360
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
361
                                 coef_probs);
362
#endif
363
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
364
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
365
      break;
366 367 368
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
369
      const int sz = 2 + b_width_log2(sb_type);
370
      const int x = block & ((1 << sz) - 1), y = block - x;
371 372
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
373
      scan = get_scan_16x16(tx_type);
374
#if !CONFIG_BALANCED_COEFTREE
375
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
376
                                 coef_probs);
377
#endif
378
      seg_eob = 256;
379 380
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
381
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
382
      break;
383
    }
384
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
385
      scan = vp9_default_scan_32x32;
386
#if !CONFIG_BALANCED_COEFTREE
387
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
388
                                 coef_probs);
389
#endif
390
      seg_eob = 1024;
391 392
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
393
      band_translate = vp9_coefband_trans_8x8plus;
394
      break;
Daniel Kang's avatar
Daniel Kang committed
395
    default:
396
      abort();
Daniel Kang's avatar
Daniel Kang committed
397 398
      break;
  }
John Koleszar's avatar
John Koleszar committed
399
  assert(eob <= seg_eob);
400

401
  pt = combine_entropy_contexts(above_ec, left_ec);
402 403
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
404

405 406
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
407

408 409 410 411
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

412
  {
413
    for (c = 0; c < eob; c++) {
414
      int v = qcoeff_ptr[scan[c]];
415
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
416
      int band = get_coef_band(band_translate, c);
417 418
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
419

420 421 422 423 424 425
#if CONFIG_BALANCED_COEFTREE
      if (!c || token_cache[scan[c - 1]])  // do not skip eob
        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
      else
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#else
426
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
427
      if (!c || token_cache[scan[c - 1]])
428
        cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
429
#endif
430
      token_cache[scan[c]] = vp9_pt_energy_class[t];
431
    }
432 433 434
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
435 436 437 438 439
#if CONFIG_BALANCED_COEFTREE
      cost += mb->token_costs_noskip[tx_size][type][ref]
          [get_coef_band(band_translate, c)]
          [pt][DCT_EOB_TOKEN];
#else
440
      cost += mb->token_costs[tx_size][type][ref]
Paul Wilkins's avatar
Paul Wilkins committed
441
          [get_coef_band(band_translate, c)]
442
          [pt][DCT_EOB_TOKEN];
443
#endif
444
    }
445 446
  }

447 448 449
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
450
  }
451

452 453 454
  return cost;
}

455
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
456 457 458 459 460
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
461 462 463
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
464
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
465 466
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
467
  int s0, s1;
468 469 470 471 472 473 474 475 476 477

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
478

479 480 481
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
482

483 484 485 486 487 488
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
489 490 491
    }
  }

492 493 494 495 496 497
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
498 499 500 501 502 503
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
504
    mbmi->txfm_size = TX_16X16;
505
  } else if (cm->txfm_mode == ALLOW_8X8 ||
506 507
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
508
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
509 510 511 512 513
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

514
  *distortion = d[mbmi->txfm_size];
515
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
516 517
  *skip       = s[mbmi->txfm_size];

518 519
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
520 521
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
522 523 524 525
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
526 527
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
528
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
529
  else
530 531
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
532 533
}

534 535
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
536 537 538 539
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
540
    int this_diff = coeff[i] - dqcoeff[i];
541
    error += (unsigned)this_diff * this_diff;
542
  }
543
  error >>= shift;
544

Frank Galligan's avatar
Frank Galligan committed
545
  return error > INT_MAX ? INT_MAX : (int)error;
546 547
}

548 549
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
550
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
551
                     16 << (bwl + bhl), shift);
552
}
553

554 555 556 557
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
558

559 560 561 562 563 564 565 566
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
567 568
}

569 570
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
571
  MACROBLOCKD *const xd = &x->e_mbd;
572 573 574
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
575
  ENTROPY_CONTEXT t_above[16], t_left[16];
576
  int block, cost;
577

578
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
579
             sizeof(ENTROPY_CONTEXT) * bw);
580
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
581
             sizeof(ENTROPY_CONTEXT) * bh);
582

583 584 585
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
586

587 588
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
589

590 591 592
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
593
  }
594 595 596 597

  return cost;
}

598 599 600
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
601

602 603
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
604 605
  }
  return cost;
606 607
}

608 609 610
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
611
  MACROBLOCKD *const xd = &x->e_mbd;
612 613
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
614

615 616
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
617
  *skippable  = vp9_sby_is_skippable(xd, bsize);
618 619
}

620 621
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
622
                            int *skip, BLOCK_SIZE_TYPE bs,
623
                            int64_t txfm_cache[NB_TXFM_MODES]) {
624 625
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
626 627
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
628

629
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
630

Jim Bankoski's avatar
Jim Bankoski committed
631 632 633 634 635 636 637 638 639 640 641 642 643 644
  if (cpi->speed > 4) {
    if (bs >= BLOCK_SIZE_SB32X32) {
      mbmi->txfm_size = TX_32X32;
    } else if (bs >= BLOCK_SIZE_MB16X16) {
      mbmi->txfm_size = TX_16X16;
    } else if (bs >= BLOCK_SIZE_SB8X8) {
      mbmi->txfm_size = TX_8X8;
    } else {
      mbmi->txfm_size = TX_4X4;
    }
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
                             mbmi->txfm_size);
    return;
  }
645
  if (bs >= BLOCK_SIZE_SB32X32)
646 647
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
648
  if (bs >= BLOCK_SIZE_MB16X16)
649 650 651 652 653 654
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
655 656

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
657
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
658
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
659
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
660

661
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
662
                                     MB_PREDICTION_MODE *best_mode,
663 664 665
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
666 667 668
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
669
  MACROBLOCKD *xd = &x->e_mbd;
670
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
671 672
  int rate = 0;
  int distortion;
673
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
674
  const int src_stride = x->plane[0].src.stride;
675 676 677 678 679
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
680 681
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
682 683 684 685
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
686

Jingning Han's avatar
Jingning Han committed
687
  assert(ib < 4);
688

689 690
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
691
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
692 693

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
694
    int64_t this_rd;
695
    int ratey = 0;
696

697
    rate = bmode_costs[mode];
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
715 716
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
                             dst, xd->plane[0].dst.stride);
717 718 719 720 721 722 723 724 725 726 727 728
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
729

730 731 732 733
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
734

735 736 737 738 739 740 741 742
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
743

744 745
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
746

747 748 749 750 751 752 753
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
754 755 756 757 758 759 760 761 762 763
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
764
    }
John Koleszar's avatar
John Koleszar committed
765
  }
766 767 768 769 770 771 772 773 774 775 776 777 778 779