vp9_rdopt.c 174 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16 17
#include "vp9/common/vp9_pragmas.h"

18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38 39 40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45 46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47 48
#define INVALID_MV 0x80008000

49 50 51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70 71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73 74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85 86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88 89 90 91 92 93 94 95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99 100 101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103 104 105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

Yaowu Xu's avatar
Yaowu Xu committed
107
  {I4X4_PRED,    INTRA_FRAME,  NONE},
108
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110 111 112 113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115 116 117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119 120 121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123 124 125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127 128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148 149
};

150 151
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
152
                             TX_SIZE tx_size) {
153
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
154

155
  for (i = 0; i < BLOCK_TYPES; i++)
156 157 158
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
159 160 161 162 163 164
          vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
                               p[i][j][k][l],
                               vp9_coef_tree);
        }
}

165 166 167 168
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
169

170
// 3* dc_qlookup[Q]*dc_qlookup[Q];
171

172
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
173 174 175
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

176
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
177 178 179 180 181 182 183
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
184 185
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
186
  }
Paul Wilkins's avatar
Paul Wilkins committed
187
}
John Koleszar's avatar
John Koleszar committed
188

189
static int compute_rd_mult(int qindex) {
190
  const int q = vp9_dc_quant(qindex, 0);
191
  return (11 * q * q) >> 2;
192 193
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
194 195 196
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
197 198
}

199

Dmitry Kovalev's avatar
Dmitry Kovalev committed
200
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
201
  int q, i;
John Koleszar's avatar
John Koleszar committed
202

203
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
204

John Koleszar's avatar
John Koleszar committed
205 206 207 208
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
209
  qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex);
210

Dmitry Kovalev's avatar
Dmitry Kovalev committed
211
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
212 213 214 215 216
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
217
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
218
  }
219
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
220
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
221

222
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
223

Dmitry Kovalev's avatar
Dmitry Kovalev committed
224 225
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
226 227
  if (q < 8)
    q = 8;
228

John Koleszar's avatar
John Koleszar committed
229 230 231
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
232

John Koleszar's avatar
John Koleszar committed
233 234 235 236 237 238 239
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
240
    }
John Koleszar's avatar
John Koleszar committed
241 242
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
243

John Koleszar's avatar
John Koleszar committed
244 245 246 247 248 249 250
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
251
    }
John Koleszar's avatar
John Koleszar committed
252
  }
John Koleszar's avatar
John Koleszar committed
253

254
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
255
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
256
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
257
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
258
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
259
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
260
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
261
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
262

263
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
264 265 266 267
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
268 269
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
270
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
271

272
  if (cpi->common.frame_type != KEY_FRAME) {
273
    vp9_build_nmv_cost_table(
274 275 276 277 278 279
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
280 281
}

282
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
283
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
284

285
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
286 287 288
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
289

John Koleszar's avatar
John Koleszar committed
290
  return error;
John Koleszar's avatar
John Koleszar committed
291 292
}

293
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
294
                              int ib, PLANE_TYPE type,
295 296
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
297 298
                              TX_SIZE tx_size,
                              int y_blocks) {
299
  MACROBLOCKD *const xd = &mb->e_mbd;
300 301
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
302
  int c = 0;
303 304
  int cost = 0, pad;
  const int *scan, *nb;
John Koleszar's avatar
John Koleszar committed
305 306
  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
  const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
307 308
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
                                           pb_idx.block, 16);
309
  const int ref = mbmi->ref_frame != INTRA_FRAME;
310
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
311
      mb->token_costs[tx_size][type][ref];
312
  ENTROPY_CONTEXT above_ec, left_ec;
313
  TX_TYPE tx_type = DCT_DCT;
314

315 316 317 318 319 320 321 322
#if CONFIG_CODE_ZEROGROUP
  int last_nz_pos[3] = {-1, -1, -1};  // Encoder only
  int is_eoo_list[3] = {0, 0, 0};
  int is_eoo_negative[3] = {0, 0, 0};
  int is_last_zero[3] = {0, 0, 0};
  int o, rc, skip_coef_val;
  vp9_zpc_probs *zpc_probs;
  uint8_t token_cache_full[1024];
323
#endif
324
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
325 326 327 328
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
329

330 331 332 333
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache, UNKNOWN_TOKEN, sizeof(token_cache));
#endif

334
  // Check for consistency of tx_size with mode info
John Koleszar's avatar
John Koleszar committed
335
  assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
336 337 338 339 340 341 342
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

343
  switch (tx_size) {
344
    case TX_4X4: {
345 346
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_4x4(xd, ib) : DCT_DCT;
347 348
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
349
      coef_probs = cm->fc.coef_probs_4x4;
350
      seg_eob = 16;
351
      scan = get_scan_4x4(tx_type);
352 353 354
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_4x4;
#endif
Daniel Kang's avatar
Daniel Kang committed
355
      break;
356
    }
357 358
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
359
      const int sz = 1 + b_width_log2(sb_type);
360
      const int x = ib & ((1 << sz) - 1), y = ib - x;
361 362
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
363 364
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
365
      scan = get_scan_8x8(tx_type);
366
      coef_probs = cm->fc.coef_probs_8x8;
367
      seg_eob = 64;
368 369 370
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_8x8;
#endif
Daniel Kang's avatar
Daniel Kang committed
371
      break;
372 373 374
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
375
      const int sz = 2 + b_width_log2(sb_type);
376
      const int x = ib & ((1 << sz) - 1), y = ib - x;
377 378
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
379
      scan = get_scan_16x16(tx_type);
380
      coef_probs = cm->fc.coef_probs_16x16;
381
      seg_eob = 256;
382 383
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
384 385 386
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_16x16;
#endif
Daniel Kang's avatar
Daniel Kang committed
387
      break;
388
    }
389 390
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
391
      coef_probs = cm->fc.coef_probs_32x32;
392
      seg_eob = 1024;
393 394 395
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;

396 397 398
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_32x32;
#endif
399
      break;
Daniel Kang's avatar
Daniel Kang committed
400
    default:
401
      abort();
Daniel Kang's avatar
Daniel Kang committed
402 403
      break;
  }
John Koleszar's avatar
John Koleszar committed
404
  assert(eob <= seg_eob);
405

406
  pt = combine_entropy_contexts(above_ec, left_ec);
407 408
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
409

410 411
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
412

413 414 415 416
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

417 418 419 420 421 422 423 424 425 426
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache_full, ZERO_TOKEN, sizeof(token_cache_full));
  for (c = 0; c < eob; ++c) {
    rc = scan[c];
    token_cache_full[rc] = vp9_dct_value_tokens_ptr[qcoeff_ptr[rc]].token;
    o = vp9_get_orientation(rc, tx_size);
    if (qcoeff_ptr[rc] != 0)
      last_nz_pos[o] = c;
  }
#endif
427
  {
428
    for (c = 0; c < eob; c++) {
429
      int v = qcoeff_ptr[scan[c]];
430
      int t = vp9_dct_value_tokens_ptr[v].token;
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
      int band = get_coef_band(scan, tx_size, c);
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_CODE_ZEROGROUP
      rc = scan[c];
      o = vp9_get_orientation(rc, tx_size);
      skip_coef_val = (token_cache[rc] == ZERO_TOKEN || is_eoo_list[o]);
      if (!skip_coef_val) {
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
      } else {
        assert(v == 0);
      }
#else
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#endif
446 447
      if (!c || token_cache[scan[c - 1]])
        cost += vp9_cost_bit(coef_probs[type][ref][band][pt][0], 1);
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
      token_cache[scan[c]] = t;
#if CONFIG_CODE_ZEROGROUP
      if (t == ZERO_TOKEN && !skip_coef_val) {
        int eoo = 0, use_eoo;
#if USE_ZPC_EOORIENT == 1
        use_eoo = vp9_use_eoo(c, seg_eob, scan, tx_size,
                              is_last_zero, is_eoo_list);
#else
        use_eoo = 0;
#endif
        if (use_eoo) {
          eoo = vp9_is_eoo(c, eob, scan, tx_size, qcoeff_ptr, last_nz_pos);
          if (eoo && is_eoo_negative[o]) eoo = 0;
          if (eoo) {
            int c_;
            int savings = 0;
            int zsaved = 0;
            savings = vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 1) -
                      vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 0);
            for (c_ = c + 1; c_ < eob; ++c_) {
              if (o == vp9_get_orientation(scan[c_], tx_size)) {
                int pt_ = vp9_get_coef_context(scan, nb, pad,
                                               token_cache_full, c_,
                                               default_eob);
                int band_ = get_coef_band(scan, tx_size, c_);
                assert(token_cache_full[scan[c_]] == ZERO_TOKEN);
                if (!c_ || token_cache_full[scan[c_ - 1]])
                  savings += vp9_cost_bit(
                      coef_probs[type][ref][band_][pt_][0], 1);
                savings += vp9_cost_bit(
                    coef_probs[type][ref][band_][pt_][1], 0);
                zsaved++;
              }
            }
            if (savings < 0) {
            // if (zsaved < ZPC_ZEROSSAVED_EOO) {
              eoo = 0;
              is_eoo_negative[o] = 1;
            }
          }
        }
        if (use_eoo) {
          cost += vp9_cost_bit((*zpc_probs)[ref]
                                           [coef_to_zpc_band(band)]
                                           [coef_to_zpc_ptok(pt)][0], !eoo);
          if (eoo) {
            assert(is_eoo_list[o] == 0);
            is_eoo_list[o] = 1;
          }
        }
      }
      is_last_zero[o] = (t == ZERO_TOKEN);
504
#endif
505
    }
506 507 508 509 510 511 512
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
          [get_coef_band(scan, tx_size, c)]
          [pt][DCT_EOB_TOKEN];
    }
513 514
  }

515 516 517
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
518
  }
519

520 521 522
  return cost;
}

523
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
524 525 526 527 528
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
529 530 531
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
532
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
533 534
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
535
  int s0, s1;
536 537 538 539 540 541 542 543 544 545

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
546

547 548 549
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
550

551 552 553 554 555 556
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
557 558 559
    }
  }

560 561 562 563 564 565
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
566 567 568 569 570
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
571
    mbmi->txfm_size = TX_16X16;
572
  } else if (cm->txfm_mode == ALLOW_8X8 ||
573
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
574 575
    mbmi->txfm_size = TX_8X8;
  } else {
576
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
577 578 579
    mbmi->txfm_size = TX_4X4;
  }

580
  *distortion = d[mbmi->txfm_size];
581
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
582 583
  *skip       = s[mbmi->txfm_size];

584 585 586 587 588 589 590 591
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
592
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
593
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
594
  else
595 596
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
597 598
}

599 600
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
601 602 603 604
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
605
    int this_diff = coeff[i] - dqcoeff[i];
606 607
    error += this_diff * this_diff;
  }
608
  error >>= shift;
609

Frank Galligan's avatar
Frank Galligan committed
610
  return error > INT_MAX ? INT_MAX : (int)error;
611 612
}

613 614 615 616
static int block_error_sby(MACROBLOCK *x, int block_size, int shift) {
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
                     block_size, shift);
}
617

618 619 620 621
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
622

623 624 625 626 627 628 629 630
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
631 632
}

633 634
static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
635 636
  const int bwl = b_width_log2(bsize), bw = 1 << bwl;
  const int bh = 1 << b_height_log2(bsize);
637 638
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
639
  ENTROPY_CONTEXT t_above[16], t_left[16];
640

641 642 643 644
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * bh);
645

646 647
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
648
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
649 650
                        t_above + x_idx, t_left + y_idx,
                        TX_4X4, bw * bh);
651
  }
652 653 654 655 656

  return cost;
}

static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
657 658
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
659
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
660 661 662
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
663 664
  vp9_transform_sby_4x4(x, bsize);
  vp9_quantize_sby_4x4(x, bsize);
665

666
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
667
  *rate       = rdcost_sby_4x4(cm, x, bsize);
668
  *skippable  = vp9_sby_is_skippable(xd, bsize);
669 670
}

671 672
static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
673 674
  const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 1);
675 676
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
677
  ENTROPY_CONTEXT t_above[16], t_left[16];
678

679 680 681 682
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 2 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 2 * bh);
683 684 685 686

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_Y_WITH_DC,
687 688
                        t_above + x_idx * 2, t_left + y_idx * 2,
                        TX_8X8, 4 * bw * bh);
689
  }
690 691 692 693 694

  return cost;
}

static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
695 696
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
697
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
698 699 700
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
701 702
  vp9_transform_sby_8x8(x, bsize);
  vp9_quantize_sby_8x8(x, bsize);
703

704
  *distortion = block_error_sby(x, 16 << (bhl + bwl), 2);
705
  *rate       = rdcost_sby_8x8(cm, x, bsize);
706
  *skippable  = vp9_sby_is_skippable(xd, bsize);
707 708
}

709 710
static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
711 712
  const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 2);
713 714
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
715
  ENTROPY_CONTEXT t_above[16], t_left[16];
716

717 718 719 720
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 4 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 4 * bh);
721

722 723 724
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_Y_WITH_DC,
725
                        t_above + x_idx * 4, t_left + y_idx * 4,
726 727
                        TX_16X16, bw * bh * 16);
  }
728 729 730 731 732

  return cost;
}

static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
733 734
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
735
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
736 737 738
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
739 740
  vp9_transform_sby_16x16(x, bsize);
  vp9_quantize_sby_16x16(x, bsize);
741

742
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
743
  *rate       = rdcost_sby_16x16(cm, x, bsize);
744
  *skippable  = vp9_sby_is_skippable(xd, bsize);
745 746
}

747 748
static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
749 750
  const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 3);
751
  int cost = 0, b;
752
  MACROBLOCKD * const xd = &x->e_mbd;
753
  ENTROPY_CONTEXT t_above[16], t_left[16];
754

755 756 757 758
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 8 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 8 * bh);
759 760 761 762

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 64, PLANE_TYPE_Y_WITH_DC,
763
                        t_above + x_idx * 8, t_left + y_idx * 8,
764 765
                        TX_32X32, bw * bh * 64);
  }
766

767
  return cost;
768 769
}

770
static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
771 772
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
773
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
774
  MACROBLOCKD *const xd = &x->e_mbd;
775

776
  xd->mode_info_context->mbmi.txfm_size = TX_32X32;
777 778
  vp9_transform_sby_32x32(x, bsize);
  vp9_quantize_sby_32x32(x, bsize);
779

780
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 0);
781
  *rate       = rdcost_sby_32x32(cm, x, bsize);
782
  *skippable  = vp9_sby_is_skippable(xd, bsize);
783 784
}

785 786
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
787
                            int *skip, BLOCK_SIZE_TYPE bs,
788
                            int64_t txfm_cache[NB_TXFM_MODES]) {
789 790
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
791

792
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
793

794 795 796 797 798 799
  if (bs >= BLOCK_SIZE_SB32X32)
    super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                          bs);
  super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], bs);
  super_block_yrd_8x8(cm, x,   &r[TX_8X8][0],   &d[TX_8X8],   &s[TX_8X8],   bs);
  super_block_yrd_4x4(cm, x,   &r[TX_4X4][0],   &d[TX_4X4],   &s[TX_4X4],   bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
800 801

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
802
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
803
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
804

805 806
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
807 808 809 810
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
811
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
812
  MACROBLOCKD *xd = &x->e_mbd;
813
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
814 815
  int rate = 0;
  int distortion;
816
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
817 818 819 820
  const int src_stride = x->plane[0].src.stride;
  uint8_t* const src =
      raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
                                x->plane[0].src.buf, src_stride);
821 822 823
  int16_t* const src_diff =
      raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
                                x->plane[0].src_diff);
John Koleszar's avatar
John Koleszar committed
824 825 826
  int16_t* const diff =
      raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
                                xd->plane[0].diff);