vp9_rdopt.c 204 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
67 68
};

69
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
70 71
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
72

73 74
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85 86 87 88 89 90 91 92
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
93

94
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
95

96 97 98
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
99

100 101 102
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
103

Yaowu Xu's avatar
Yaowu Xu committed
104
  {I4X4_PRED,    INTRA_FRAME,  NONE},
105
#if !CONFIG_SB8X8
106
  {I8X8_PRED, INTRA_FRAME,  NONE},
107
#endif
108

John Koleszar's avatar
John Koleszar committed
109 110 111 112
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
113

John Koleszar's avatar
John Koleszar committed
114 115 116
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
117

John Koleszar's avatar
John Koleszar committed
118 119 120
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
121

John Koleszar's avatar
John Koleszar committed
122 123 124
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
125

John Koleszar's avatar
John Koleszar committed
126 127
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
147 148
};

149 150
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
151
                             TX_SIZE tx_size) {
152
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
153

154
  for (i = 0; i < BLOCK_TYPES; i++)
155 156
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
157 158
        for (l = 0; l < PREV_COEF_CONTEXTS; l++)
          vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l],
159 160 161
                               vp9_coef_tree);
}

162 163 164 165
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
166

167
// 3* dc_qlookup[Q]*dc_qlookup[Q];
168

169
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
170 171 172
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

173
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
174 175 176 177 178 179 180
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
181
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
182
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
183
  }
Paul Wilkins's avatar
Paul Wilkins committed
184
}
John Koleszar's avatar
John Koleszar committed
185

186
static int compute_rd_mult(int qindex) {
187
  const int q = vp9_dc_quant(qindex, 0);
188
  return (11 * q * q) >> 2;
189 190
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
191 192 193
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
194 195
}

196

Dmitry Kovalev's avatar
Dmitry Kovalev committed
197
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
198
  int q, i;
John Koleszar's avatar
John Koleszar committed
199

200
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
201

John Koleszar's avatar
John Koleszar committed
202 203 204 205
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
206
  qindex = clamp(qindex, 0, MAXQ);
207

Dmitry Kovalev's avatar
Dmitry Kovalev committed
208
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
209 210 211 212 213
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
214
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
215
  }
216
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
217
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
218

219
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
220

Dmitry Kovalev's avatar
Dmitry Kovalev committed
221 222
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
223 224
  if (q < 8)
    q = 8;
225

John Koleszar's avatar
John Koleszar committed
226 227 228
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
229

John Koleszar's avatar
John Koleszar committed
230 231 232 233 234 235 236
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
237
    }
John Koleszar's avatar
John Koleszar committed
238 239
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
240

John Koleszar's avatar
John Koleszar committed
241 242 243 244 245 246 247
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
248
    }
John Koleszar's avatar
John Koleszar committed
249
  }
John Koleszar's avatar
John Koleszar committed
250

251
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
252
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
253
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
254
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
255
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
256
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
257
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
258
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
259

260
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
261 262 263 264
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
265 266
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
267
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
268

269
  if (cpi->common.frame_type != KEY_FRAME) {
270
    vp9_build_nmv_cost_table(
271 272 273 274 275 276
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
277 278
}

279
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
280
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
281

282
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
283 284 285
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
286

John Koleszar's avatar
John Koleszar committed
287
  return error;
John Koleszar's avatar
John Koleszar committed
288 289
}

290
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
291
                              int ib, PLANE_TYPE type,
292 293
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
294 295
                              TX_SIZE tx_size,
                              int y_blocks) {
296
  MACROBLOCKD *const xd = &mb->e_mbd;
297 298
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
299
  int c = 0;
300 301
  int cost = 0, pad;
  const int *scan, *nb;
John Koleszar's avatar
John Koleszar committed
302 303
  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
  const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
304 305
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
                                           pb_idx.block, 16);
306
  const int ref = mbmi->ref_frame != INTRA_FRAME;
307
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
308
      mb->token_costs[tx_size][type][ref];
309
  ENTROPY_CONTEXT above_ec, left_ec;
310
  TX_TYPE tx_type = DCT_DCT;
311

312 313 314 315 316 317 318 319
#if CONFIG_CODE_ZEROGROUP
  int last_nz_pos[3] = {-1, -1, -1};  // Encoder only
  int is_eoo_list[3] = {0, 0, 0};
  int is_eoo_negative[3] = {0, 0, 0};
  int is_last_zero[3] = {0, 0, 0};
  int o, rc, skip_coef_val;
  vp9_zpc_probs *zpc_probs;
  uint8_t token_cache_full[1024];
320
#endif
321
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
322 323 324 325
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
326

327 328 329 330
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache, UNKNOWN_TOKEN, sizeof(token_cache));
#endif

331
  // Check for consistency of tx_size with mode info
John Koleszar's avatar
John Koleszar committed
332
  assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
333 334 335 336 337 338 339
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

340
  switch (tx_size) {
341
    case TX_4X4: {
342 343
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_4x4(xd, ib) : DCT_DCT;
344 345
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
346
      coef_probs = cm->fc.coef_probs_4x4;
347
      seg_eob = 16;
348
      scan = get_scan_4x4(tx_type);
349 350 351
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_4x4;
#endif
Daniel Kang's avatar
Daniel Kang committed
352
      break;
353
    }
354 355
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
356
      const int sz = 1 + b_width_log2(sb_type);
357
      const int x = ib & ((1 << sz) - 1), y = ib - x;
358 359
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
360 361
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
362
      scan = get_scan_8x8(tx_type);
363
      coef_probs = cm->fc.coef_probs_8x8;
364
      seg_eob = 64;
365 366 367
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_8x8;
#endif
Daniel Kang's avatar
Daniel Kang committed
368
      break;
369 370 371
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
372
      const int sz = 2 + b_width_log2(sb_type);
373
      const int x = ib & ((1 << sz) - 1), y = ib - x;
374 375
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
376
      scan = get_scan_16x16(tx_type);
377
      coef_probs = cm->fc.coef_probs_16x16;
378
      seg_eob = 256;
379 380
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
381 382 383
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_16x16;
#endif
Daniel Kang's avatar
Daniel Kang committed
384
      break;
385
    }
386 387
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
388
      coef_probs = cm->fc.coef_probs_32x32;
389
      seg_eob = 1024;
390 391 392
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;

393 394 395
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_32x32;
#endif
396
      break;
Daniel Kang's avatar
Daniel Kang committed
397
    default:
398
      abort();
Daniel Kang's avatar
Daniel Kang committed
399 400
      break;
  }
John Koleszar's avatar
John Koleszar committed
401
  assert(eob <= seg_eob);
402

403
  pt = combine_entropy_contexts(above_ec, left_ec);
404 405
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
406

407 408
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
409

410 411 412 413
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

414 415 416 417 418 419 420 421 422 423
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache_full, ZERO_TOKEN, sizeof(token_cache_full));
  for (c = 0; c < eob; ++c) {
    rc = scan[c];
    token_cache_full[rc] = vp9_dct_value_tokens_ptr[qcoeff_ptr[rc]].token;
    o = vp9_get_orientation(rc, tx_size);
    if (qcoeff_ptr[rc] != 0)
      last_nz_pos[o] = c;
  }
#endif
424
  {
425
    for (c = 0; c < eob; c++) {
426
      int v = qcoeff_ptr[scan[c]];
427
      int t = vp9_dct_value_tokens_ptr[v].token;
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
      int band = get_coef_band(scan, tx_size, c);
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_CODE_ZEROGROUP
      rc = scan[c];
      o = vp9_get_orientation(rc, tx_size);
      skip_coef_val = (token_cache[rc] == ZERO_TOKEN || is_eoo_list[o]);
      if (!skip_coef_val) {
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
      } else {
        assert(v == 0);
      }
#else
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#endif
443 444
      if (!c || token_cache[scan[c - 1]])
        cost += vp9_cost_bit(coef_probs[type][ref][band][pt][0], 1);
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
      token_cache[scan[c]] = t;
#if CONFIG_CODE_ZEROGROUP
      if (t == ZERO_TOKEN && !skip_coef_val) {
        int eoo = 0, use_eoo;
#if USE_ZPC_EOORIENT == 1
        use_eoo = vp9_use_eoo(c, seg_eob, scan, tx_size,
                              is_last_zero, is_eoo_list);
#else
        use_eoo = 0;
#endif
        if (use_eoo) {
          eoo = vp9_is_eoo(c, eob, scan, tx_size, qcoeff_ptr, last_nz_pos);
          if (eoo && is_eoo_negative[o]) eoo = 0;
          if (eoo) {
            int c_;
            int savings = 0;
            int zsaved = 0;
            savings = vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 1) -
                      vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 0);
            for (c_ = c + 1; c_ < eob; ++c_) {
              if (o == vp9_get_orientation(scan[c_], tx_size)) {
                int pt_ = vp9_get_coef_context(scan, nb, pad,
                                               token_cache_full, c_,
                                               default_eob);
                int band_ = get_coef_band(scan, tx_size, c_);
                assert(token_cache_full[scan[c_]] == ZERO_TOKEN);
                if (!c_ || token_cache_full[scan[c_ - 1]])
                  savings += vp9_cost_bit(
                      coef_probs[type][ref][band_][pt_][0], 1);
                savings += vp9_cost_bit(
                    coef_probs[type][ref][band_][pt_][1], 0);
                zsaved++;
              }
            }
            if (savings < 0) {
            // if (zsaved < ZPC_ZEROSSAVED_EOO) {
              eoo = 0;
              is_eoo_negative[o] = 1;
            }
          }
        }
        if (use_eoo) {
          cost += vp9_cost_bit((*zpc_probs)[ref]
                                           [coef_to_zpc_band(band)]
                                           [coef_to_zpc_ptok(pt)][0], !eoo);
          if (eoo) {
            assert(is_eoo_list[o] == 0);
            is_eoo_list[o] = 1;
          }
        }
      }
      is_last_zero[o] = (t == ZERO_TOKEN);
501
#endif
502
    }
503 504 505 506 507 508 509
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
          [get_coef_band(scan, tx_size, c)]
          [pt][DCT_EOB_TOKEN];
    }
510 511
  }

512 513 514
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
515
  }
516

517 518 519
  return cost;
}

520
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
521 522 523 524 525
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
526 527 528
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
529
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
530 531
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
532
  int s0, s1;
533 534 535 536 537 538 539 540 541 542

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
543

544 545 546
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
547

548 549 550 551 552 553
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
554 555 556
    }
  }

557 558 559 560 561 562
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
563 564 565 566 567 568
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
569
    mbmi->txfm_size = TX_16X16;
570
  } else if (cm->txfm_mode == ALLOW_8X8 ||
571 572
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
573
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
574 575 576 577 578
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

579
  *distortion = d[mbmi->txfm_size];
580
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
581 582
  *skip       = s[mbmi->txfm_size];

583 584
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
585 586
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
587 588 589 590
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
591 592
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
593
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
594
  else
595 596
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
597 598
}

599 600
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
601 602 603 604
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
605
    int this_diff = coeff[i] - dqcoeff[i];
606 607
    error += this_diff * this_diff;
  }
608
  error >>= shift;
609

Frank Galligan's avatar
Frank Galligan committed
610
  return error > INT_MAX ? INT_MAX : (int)error;
611 612
}

613 614 615 616
static int block_error_sby(MACROBLOCK *x, int block_size, int shift) {
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
                     block_size, shift);
}
617

618 619 620 621
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
622

623 624 625 626 627 628 629 630
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
631 632
}

633 634
static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
635 636
  const int bwl = b_width_log2(bsize), bw = 1 << bwl;
  const int bh = 1 << b_height_log2(bsize);
637 638
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
639
  ENTROPY_CONTEXT t_above[16], t_left[16];
640

641 642 643 644
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * bh);
645

646 647
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
648
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
649 650
                        t_above + x_idx, t_left + y_idx,
                        TX_4X4, bw * bh);
651
  }
652 653 654 655 656

  return cost;
}

static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
657 658
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
659
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
660 661 662
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
663 664
  vp9_transform_sby_4x4(x, bsize);
  vp9_quantize_sby_4x4(x, bsize);
665

666
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
667
  *rate       = rdcost_sby_4x4(cm, x, bsize);
668
  *skippable  = vp9_sby_is_skippable(xd, bsize);
669 670
}

671 672
static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
673 674
  const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 1);
675 676
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
677
  ENTROPY_CONTEXT t_above[16], t_left[16];
678

679 680 681 682
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 2 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 2 * bh);
683 684 685 686

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_Y_WITH_DC,
687 688
                        t_above + x_idx * 2, t_left + y_idx * 2,
                        TX_8X8, 4 * bw * bh);
689
  }
690 691 692 693 694

  return cost;
}

static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
695 696
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
697
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
698 699 700
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
701 702
  vp9_transform_sby_8x8(x, bsize);
  vp9_quantize_sby_8x8(x, bsize);
703

704
  *distortion = block_error_sby(x, 16 << (bhl + bwl), 2);
705
  *rate       = rdcost_sby_8x8(cm, x, bsize);
706
  *skippable  = vp9_sby_is_skippable(xd, bsize);
707 708
}

709 710
static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
711 712
  const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 2);
713 714
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
715
  ENTROPY_CONTEXT t_above[16], t_left[16];
716

717 718 719 720
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 4 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 4 * bh);
721

722 723 724
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_Y_WITH_DC,
725
                        t_above + x_idx * 4, t_left + y_idx * 4,
726 727
                        TX_16X16, bw * bh * 16);
  }
728 729 730 731 732

  return cost;
}

static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
733 734
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
735
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
736 737 738
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
739 740
  vp9_transform_sby_16x16(x, bsize);
  vp9_quantize_sby_16x16(x, bsize);
741

742
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
743
  *rate       = rdcost_sby_16x16(cm, x, bsize);
744
  *skippable  = vp9_sby_is_skippable(xd, bsize);
745 746
}

747 748
static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
749 750
  const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 3);
751
  int cost = 0, b;
752
  MACROBLOCKD * const xd = &x->e_mbd;
753
  ENTROPY_CONTEXT t_above[16], t_left[16];
754

755 756 757 758
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 8 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 8 * bh);
759 760 761 762

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 64, PLANE_TYPE_Y_WITH_DC,
763
                        t_above + x_idx * 8, t_left + y_idx * 8,
764 765
                        TX_32X32, bw * bh * 64);
  }
766

767
  return cost;
768 769
}

770
static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
771 772
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
773
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
774
  MACROBLOCKD *const xd = &x->e_mbd;
775

776
  xd->mode_info_context->mbmi.txfm_size = TX_32X32;
777 778
  vp9_transform_sby_32x32(x, bsize);
  vp9_quantize_sby_32x32(x, bsize);
779

780
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 0);
781
  *rate       = rdcost_sby_32x32(cm, x, bsize);
782
  *skippable  = vp9_sby_is_skippable(xd, bsize);
783 784
}

785 786
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
787
                            int *skip, BLOCK_SIZE_TYPE bs,
788
                            int64_t txfm_cache[NB_TXFM_MODES]) {
789 790
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
791

792
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
793

794 795 796
  if (bs >= BLOCK_SIZE_SB32X32)
    super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                          bs);
797 798 799
  if (bs >= BLOCK_SIZE_MB16X16)
    super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                          bs);
800 801
  super_block_yrd_8x8(cm, x,   &r[TX_8X8][0],   &d[TX_8X8],   &s[TX_8X8],   bs);
  super_block_yrd_4x4(cm, x,   &r[TX_4X4][0],   &d[TX_4X4],   &s[TX_4X4],   bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
802 803

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
804 805 806 807 808
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
#if CONFIG_SB8X8
                           - (bs < BLOCK_SIZE_MB16X16)
#endif
                           );
Ronald S. Bultje's avatar
Ronald S. Bultje committed
809
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
810

811 812
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
813 814 815 816
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
817
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
818
  MACROBLOCKD *xd = &x->e_mbd;
819
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
820 821
  int rate = 0;
  int distortion;