vp9_rdopt.c 204 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16 17
#include "vp9/common/vp9_pragmas.h"

18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38 39 40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45 46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47 48
#define INVALID_MV 0x80008000

49 50 51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70 71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73 74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85 86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88 89 90 91 92 93 94 95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99 100 101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103 104 105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

Yaowu Xu's avatar
Yaowu Xu committed
107
  {I4X4_PRED,    INTRA_FRAME,  NONE},
108
#if !CONFIG_SB8X8
109
  {I8X8_PRED, INTRA_FRAME,  NONE},
110
#endif
111

John Koleszar's avatar
John Koleszar committed
112 113 114 115
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
116

John Koleszar's avatar
John Koleszar committed
117 118 119
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
120

John Koleszar's avatar
John Koleszar committed
121 122 123
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
124

John Koleszar's avatar
John Koleszar committed
125 126 127
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
128

John Koleszar's avatar
John Koleszar committed
129 130
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
150 151
};

152 153
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
154
                             TX_SIZE tx_size) {
155
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
156

157
  for (i = 0; i < BLOCK_TYPES; i++)
158 159 160
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
161 162 163 164 165 166
          vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
                               p[i][j][k][l],
                               vp9_coef_tree);
        }
}

167 168 169 170
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
171

172
// 3* dc_qlookup[Q]*dc_qlookup[Q];
173

174
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
175 176 177
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

178
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
179 180 181 182 183 184 185
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
186 187
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
188
  }
Paul Wilkins's avatar
Paul Wilkins committed
189
}
John Koleszar's avatar
John Koleszar committed
190

191
static int compute_rd_mult(int qindex) {
192
  const int q = vp9_dc_quant(qindex, 0);
193
  return (11 * q * q) >> 2;
194 195
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
196 197 198
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
199 200
}

201

Dmitry Kovalev's avatar
Dmitry Kovalev committed
202
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
203
  int q, i;
John Koleszar's avatar
John Koleszar committed
204

205
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
206

John Koleszar's avatar
John Koleszar committed
207 208 209 210
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
211
  qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex);
212

Dmitry Kovalev's avatar
Dmitry Kovalev committed
213
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
214 215 216 217 218
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
219
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
220
  }
221
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
222
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
223

224
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
225

Dmitry Kovalev's avatar
Dmitry Kovalev committed
226 227
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
228 229
  if (q < 8)
    q = 8;
230

John Koleszar's avatar
John Koleszar committed
231 232 233
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
234

John Koleszar's avatar
John Koleszar committed
235 236 237 238 239 240 241
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
242
    }
John Koleszar's avatar
John Koleszar committed
243 244
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
245

John Koleszar's avatar
John Koleszar committed
246 247 248 249 250 251 252
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
253
    }
John Koleszar's avatar
John Koleszar committed
254
  }
John Koleszar's avatar
John Koleszar committed
255

256
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
257
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
258
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
259
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
260
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
261
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
262
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
263
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
264

265
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
266 267 268 269
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
270 271
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
272
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
273

274
  if (cpi->common.frame_type != KEY_FRAME) {
275
    vp9_build_nmv_cost_table(
276 277 278 279 280 281
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
282 283
}

284
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
285
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
286

287
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
288 289 290
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
291

John Koleszar's avatar
John Koleszar committed
292
  return error;
John Koleszar's avatar
John Koleszar committed
293 294
}

295
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
296
                              int ib, PLANE_TYPE type,
297 298
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
299 300
                              TX_SIZE tx_size,
                              int y_blocks) {
301
  MACROBLOCKD *const xd = &mb->e_mbd;
302 303
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
304
  int c = 0;
305 306
  int cost = 0, pad;
  const int *scan, *nb;
John Koleszar's avatar
John Koleszar committed
307 308
  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
  const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
309 310
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
                                           pb_idx.block, 16);
311
  const int ref = mbmi->ref_frame != INTRA_FRAME;
312
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
313
      mb->token_costs[tx_size][type][ref];
314
  ENTROPY_CONTEXT above_ec, left_ec;
315
  TX_TYPE tx_type = DCT_DCT;
316

317 318 319 320 321 322 323 324
#if CONFIG_CODE_ZEROGROUP
  int last_nz_pos[3] = {-1, -1, -1};  // Encoder only
  int is_eoo_list[3] = {0, 0, 0};
  int is_eoo_negative[3] = {0, 0, 0};
  int is_last_zero[3] = {0, 0, 0};
  int o, rc, skip_coef_val;
  vp9_zpc_probs *zpc_probs;
  uint8_t token_cache_full[1024];
325
#endif
326
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
327 328 329 330
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
331

332 333 334 335
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache, UNKNOWN_TOKEN, sizeof(token_cache));
#endif

336
  // Check for consistency of tx_size with mode info
John Koleszar's avatar
John Koleszar committed
337
  assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
338 339 340 341 342 343 344
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

345
  switch (tx_size) {
346
    case TX_4X4: {
347 348
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_4x4(xd, ib) : DCT_DCT;
349 350
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
351
      coef_probs = cm->fc.coef_probs_4x4;
352
      seg_eob = 16;
353
      scan = get_scan_4x4(tx_type);
354 355 356
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_4x4;
#endif
Daniel Kang's avatar
Daniel Kang committed
357
      break;
358
    }
359 360
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
361
      const int sz = 1 + b_width_log2(sb_type);
362
      const int x = ib & ((1 << sz) - 1), y = ib - x;
363 364
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
365 366
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
367
      scan = get_scan_8x8(tx_type);
368
      coef_probs = cm->fc.coef_probs_8x8;
369
      seg_eob = 64;
370 371 372
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_8x8;
#endif
Daniel Kang's avatar
Daniel Kang committed
373
      break;
374 375 376
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
377
      const int sz = 2 + b_width_log2(sb_type);
378
      const int x = ib & ((1 << sz) - 1), y = ib - x;
379 380
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
381
      scan = get_scan_16x16(tx_type);
382
      coef_probs = cm->fc.coef_probs_16x16;
383
      seg_eob = 256;
384 385
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
386 387 388
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_16x16;
#endif
Daniel Kang's avatar
Daniel Kang committed
389
      break;
390
    }
391 392
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
393
      coef_probs = cm->fc.coef_probs_32x32;
394
      seg_eob = 1024;
395 396 397
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;

398 399 400
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_32x32;
#endif
401
      break;
Daniel Kang's avatar
Daniel Kang committed
402
    default:
403
      abort();
Daniel Kang's avatar
Daniel Kang committed
404 405
      break;
  }
John Koleszar's avatar
John Koleszar committed
406
  assert(eob <= seg_eob);
407

408
  pt = combine_entropy_contexts(above_ec, left_ec);
409 410
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
411

412 413
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
414

415 416 417 418
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

419 420 421 422 423 424 425 426 427 428
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache_full, ZERO_TOKEN, sizeof(token_cache_full));
  for (c = 0; c < eob; ++c) {
    rc = scan[c];
    token_cache_full[rc] = vp9_dct_value_tokens_ptr[qcoeff_ptr[rc]].token;
    o = vp9_get_orientation(rc, tx_size);
    if (qcoeff_ptr[rc] != 0)
      last_nz_pos[o] = c;
  }
#endif
429
  {
430
    for (c = 0; c < eob; c++) {
431
      int v = qcoeff_ptr[scan[c]];
432
      int t = vp9_dct_value_tokens_ptr[v].token;
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
      int band = get_coef_band(scan, tx_size, c);
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_CODE_ZEROGROUP
      rc = scan[c];
      o = vp9_get_orientation(rc, tx_size);
      skip_coef_val = (token_cache[rc] == ZERO_TOKEN || is_eoo_list[o]);
      if (!skip_coef_val) {
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
      } else {
        assert(v == 0);
      }
#else
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#endif
448 449
      if (!c || token_cache[scan[c - 1]])
        cost += vp9_cost_bit(coef_probs[type][ref][band][pt][0], 1);
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
      token_cache[scan[c]] = t;
#if CONFIG_CODE_ZEROGROUP
      if (t == ZERO_TOKEN && !skip_coef_val) {
        int eoo = 0, use_eoo;
#if USE_ZPC_EOORIENT == 1
        use_eoo = vp9_use_eoo(c, seg_eob, scan, tx_size,
                              is_last_zero, is_eoo_list);
#else
        use_eoo = 0;
#endif
        if (use_eoo) {
          eoo = vp9_is_eoo(c, eob, scan, tx_size, qcoeff_ptr, last_nz_pos);
          if (eoo && is_eoo_negative[o]) eoo = 0;
          if (eoo) {
            int c_;
            int savings = 0;
            int zsaved = 0;
            savings = vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 1) -
                      vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 0);
            for (c_ = c + 1; c_ < eob; ++c_) {
              if (o == vp9_get_orientation(scan[c_], tx_size)) {
                int pt_ = vp9_get_coef_context(scan, nb, pad,
                                               token_cache_full, c_,
                                               default_eob);
                int band_ = get_coef_band(scan, tx_size, c_);
                assert(token_cache_full[scan[c_]] == ZERO_TOKEN);
                if (!c_ || token_cache_full[scan[c_ - 1]])
                  savings += vp9_cost_bit(
                      coef_probs[type][ref][band_][pt_][0], 1);
                savings += vp9_cost_bit(
                    coef_probs[type][ref][band_][pt_][1], 0);
                zsaved++;
              }
            }
            if (savings < 0) {
            // if (zsaved < ZPC_ZEROSSAVED_EOO) {
              eoo = 0;
              is_eoo_negative[o] = 1;
            }
          }
        }
        if (use_eoo) {
          cost += vp9_cost_bit((*zpc_probs)[ref]
                                           [coef_to_zpc_band(band)]
                                           [coef_to_zpc_ptok(pt)][0], !eoo);
          if (eoo) {
            assert(is_eoo_list[o] == 0);
            is_eoo_list[o] = 1;
          }
        }
      }
      is_last_zero[o] = (t == ZERO_TOKEN);
506
#endif
507
    }
508 509 510 511 512 513 514
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
          [get_coef_band(scan, tx_size, c)]
          [pt][DCT_EOB_TOKEN];
    }
515 516
  }

517 518 519
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
520
  }
521

522 523 524
  return cost;
}

525
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
526 527 528 529 530
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
531 532 533
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
534
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
535 536
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
537
  int s0, s1;
538 539 540 541 542 543 544 545 546 547

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
548

549 550 551
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
552

553 554 555 556 557 558
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
559 560 561
    }
  }

562 563 564 565 566 567
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
568 569 570 571 572 573
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
574
    mbmi->txfm_size = TX_16X16;
575
  } else if (cm->txfm_mode == ALLOW_8X8 ||
576 577
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
578
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
579 580 581 582 583
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

584
  *distortion = d[mbmi->txfm_size];
585
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
586 587
  *skip       = s[mbmi->txfm_size];

588 589
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
590 591
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
592 593 594 595
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
596 597
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
598
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
599
  else
600 601
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
602 603
}

604 605
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
606 607 608 609
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
610
    int this_diff = coeff[i] - dqcoeff[i];
611 612
    error += this_diff * this_diff;
  }
613
  error >>= shift;
614

Frank Galligan's avatar
Frank Galligan committed
615
  return error > INT_MAX ? INT_MAX : (int)error;
616 617
}

618 619 620 621
static int block_error_sby(MACROBLOCK *x, int block_size, int shift) {
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
                     block_size, shift);
}
622

623 624 625 626
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
627

628 629 630 631 632 633 634 635
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
636 637
}

638 639
static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
640 641
  const int bwl = b_width_log2(bsize), bw = 1 << bwl;
  const int bh = 1 << b_height_log2(bsize);
642 643
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
644
  ENTROPY_CONTEXT t_above[16], t_left[16];
645

646 647 648 649
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * bh);
650

651 652
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
653
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
654 655
                        t_above + x_idx, t_left + y_idx,
                        TX_4X4, bw * bh);
656
  }
657 658 659 660 661

  return cost;
}

static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
662 663
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
664
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
665 666 667
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
668 669
  vp9_transform_sby_4x4(x, bsize);
  vp9_quantize_sby_4x4(x, bsize);
670

671
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
672
  *rate       = rdcost_sby_4x4(cm, x, bsize);
673
  *skippable  = vp9_sby_is_skippable(xd, bsize);
674 675
}

676 677
static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
678 679
  const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 1);
680 681
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
682
  ENTROPY_CONTEXT t_above[16], t_left[16];
683

684 685 686 687
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 2 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 2 * bh);
688 689 690 691

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_Y_WITH_DC,
692 693
                        t_above + x_idx * 2, t_left + y_idx * 2,
                        TX_8X8, 4 * bw * bh);
694
  }
695 696 697 698 699

  return cost;
}

static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
700 701
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
702
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
703 704 705
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
706 707
  vp9_transform_sby_8x8(x, bsize);
  vp9_quantize_sby_8x8(x, bsize);
708

709
  *distortion = block_error_sby(x, 16 << (bhl + bwl), 2);
710
  *rate       = rdcost_sby_8x8(cm, x, bsize);
711
  *skippable  = vp9_sby_is_skippable(xd, bsize);
712 713
}

714 715
static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
716 717
  const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 2);
718 719
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
720
  ENTROPY_CONTEXT t_above[16], t_left[16];
721

722 723 724 725
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 4 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 4 * bh);
726

727 728 729
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_Y_WITH_DC,
730
                        t_above + x_idx * 4, t_left + y_idx * 4,
731 732
                        TX_16X16, bw * bh * 16);
  }
733 734 735 736 737

  return cost;
}

static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
738 739
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
740
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
741 742 743
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
744 745
  vp9_transform_sby_16x16(x, bsize);
  vp9_quantize_sby_16x16(x, bsize);
746

747
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
748
  *rate       = rdcost_sby_16x16(cm, x, bsize);
749
  *skippable  = vp9_sby_is_skippable(xd, bsize);
750 751
}

752 753
static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
754 755
  const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 3);
756
  int cost = 0, b;
757
  MACROBLOCKD * const xd = &x->e_mbd;
758
  ENTROPY_CONTEXT t_above[16], t_left[16];
759

760 761 762 763
  vpx_memcpy(&t_above, xd->plane[0].above_context,
             sizeof(ENTROPY_CONTEXT) * 8 * bw);
  vpx_memcpy(&t_left,  xd->plane[0].left_context,
             sizeof(ENTROPY_CONTEXT) * 8 * bh);
764 765 766 767

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 64, PLANE_TYPE_Y_WITH_DC,
768
                        t_above + x_idx * 8, t_left + y_idx * 8,
769 770
                        TX_32X32, bw * bh * 64);
  }
771

772
  return cost;
773 774
}

775
static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
776 777
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
778
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
779
  MACROBLOCKD *const xd = &x->e_mbd;
780

781
  xd->mode_info_context->mbmi.txfm_size = TX_32X32;
782 783
  vp9_transform_sby_32x32(x, bsize);
  vp9_quantize_sby_32x32(x, bsize);
784

785
  *distortion = block_error_sby(x, 16 << (bwl + bhl), 0);
786
  *rate       = rdcost_sby_32x32(cm, x, bsize);
787
  *skippable  = vp9_sby_is_skippable(xd, bsize);
788 789
}

790 791
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
792
                            int *skip, BLOCK_SIZE_TYPE bs,
793
                            int64_t txfm_cache[NB_TXFM_MODES]) {
794 795
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
796

797
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
798

799 800 801
  if (bs >= BLOCK_SIZE_SB32X32)
    super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                          bs);
802 803 804
  if (bs >= BLOCK_SIZE_MB16X16)
    super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                          bs);
805 806
  super_block_yrd_8x8(cm, x,   &r[TX_8X8][0],   &d[TX_8X8],   &s[TX_8X8],   bs);
  super_block_yrd_4x4(cm, x,   &r[TX_4X4][0],   &d[TX_4X4],   &s[TX_4X4],   bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
807 808

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
809 810 811 812 813
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
#if CONFIG_SB8X8
                           - (bs < BLOCK_SIZE_MB16X16)
#endif
                           );
Ronald S. Bultje's avatar
Ronald S. Bultje committed
814
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
815

816 817
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
818 819 820 821
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
822
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
823
  MACROBLOCKD *xd = &x->e_mbd;
824
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
825 826
  int rate = 0;
  int distortion;