vp9_rdopt.c 109 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
67 68
};

69
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
70 71
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
72

73 74
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85 86 87 88 89 90 91 92
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
93

94
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
95

96 97 98
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
99

100 101 102
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
103

Yaowu Xu's avatar
Yaowu Xu committed
104
  {I4X4_PRED,    INTRA_FRAME,  NONE},
105

John Koleszar's avatar
John Koleszar committed
106 107 108 109
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
110

John Koleszar's avatar
John Koleszar committed
111 112 113
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115 116 117
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119 120 121
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123 124
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
125
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
126 127
};

128 129
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
130
                             TX_SIZE tx_size) {
131
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
132

133
  for (i = 0; i < BLOCK_TYPES; i++)
134 135
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
136 137
        for (l = 0; l < PREV_COEF_CONTEXTS; l++)
          vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l],
138 139 140
                               vp9_coef_tree);
}

141 142 143 144
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
145

146
// 3* dc_qlookup[Q]*dc_qlookup[Q];
147

148
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
149 150 151
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

152
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
153 154 155 156 157 158 159
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
160
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
161
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
162
  }
Paul Wilkins's avatar
Paul Wilkins committed
163
}
John Koleszar's avatar
John Koleszar committed
164

165
static int compute_rd_mult(int qindex) {
166
  const int q = vp9_dc_quant(qindex, 0);
167
  return (11 * q * q) >> 2;
168 169
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
170 171 172
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
173 174
}

175

Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
177
  int q, i;
John Koleszar's avatar
John Koleszar committed
178

179
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
180

John Koleszar's avatar
John Koleszar committed
181 182 183 184
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
185
  qindex = clamp(qindex, 0, MAXQ);
186

Dmitry Kovalev's avatar
Dmitry Kovalev committed
187
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
188 189 190 191 192
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
193
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
194
  }
195
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
196
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
197

198
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
199

Dmitry Kovalev's avatar
Dmitry Kovalev committed
200 201
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
202 203
  if (q < 8)
    q = 8;
204

John Koleszar's avatar
John Koleszar committed
205 206 207
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
208

John Koleszar's avatar
John Koleszar committed
209 210 211 212 213 214 215
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
216
    }
John Koleszar's avatar
John Koleszar committed
217 218
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
219

John Koleszar's avatar
John Koleszar committed
220 221 222 223 224 225 226
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
227
    }
John Koleszar's avatar
John Koleszar committed
228
  }
John Koleszar's avatar
John Koleszar committed
229

230
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
231
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
232
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
233
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
234
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
235
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
236
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
237
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
238

239
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
240 241 242 243
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
244 245
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
246
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
247

248
  if (cpi->common.frame_type != KEY_FRAME) {
249
    vp9_build_nmv_cost_table(
250 251 252 253 254 255
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
256 257
}

258
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
259
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
260

261
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
262 263 264
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
265

John Koleszar's avatar
John Koleszar committed
266
  return error;
John Koleszar's avatar
John Koleszar committed
267 268
}

269
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
270
                              int plane, int block, PLANE_TYPE type,
271 272
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
273 274
                              TX_SIZE tx_size,
                              int y_blocks) {
275
  MACROBLOCKD *const xd = &mb->e_mbd;
276 277
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
278
  int c = 0;
279 280
  int cost = 0, pad;
  const int *scan, *nb;
281 282 283
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
284
  const int ref = mbmi->ref_frame != INTRA_FRAME;
285
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
286
      mb->token_costs[tx_size][type][ref];
287
  ENTROPY_CONTEXT above_ec, left_ec;
288
  TX_TYPE tx_type = DCT_DCT;
289

290 291 292 293 294 295 296 297
#if CONFIG_CODE_ZEROGROUP
  int last_nz_pos[3] = {-1, -1, -1};  // Encoder only
  int is_eoo_list[3] = {0, 0, 0};
  int is_eoo_negative[3] = {0, 0, 0};
  int is_last_zero[3] = {0, 0, 0};
  int o, rc, skip_coef_val;
  vp9_zpc_probs *zpc_probs;
  uint8_t token_cache_full[1024];
298
#endif
299
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
300 301 302 303
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
304

305 306 307 308
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache, UNKNOWN_TOKEN, sizeof(token_cache));
#endif

309
  // Check for consistency of tx_size with mode info
310
  assert((!type && !plane) || (type && plane));
311 312 313 314 315 316 317
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

318
  switch (tx_size) {
319
    case TX_4X4: {
320
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
321
          get_tx_type_4x4(xd, block) : DCT_DCT;
322 323
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
324
      coef_probs = cm->fc.coef_probs_4x4;
325
      seg_eob = 16;
326
      scan = get_scan_4x4(tx_type);
327 328 329
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_4x4;
#endif
Daniel Kang's avatar
Daniel Kang committed
330
      break;
331
    }
332 333
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
334
      const int sz = 1 + b_width_log2(sb_type);
335
      const int x = block & ((1 << sz) - 1), y = block - x;
336 337
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
338 339
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
340
      scan = get_scan_8x8(tx_type);
341
      coef_probs = cm->fc.coef_probs_8x8;
342
      seg_eob = 64;
343 344 345
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_8x8;
#endif
Daniel Kang's avatar
Daniel Kang committed
346
      break;
347 348 349
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
350
      const int sz = 2 + b_width_log2(sb_type);
351
      const int x = block & ((1 << sz) - 1), y = block - x;
352 353
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
354
      scan = get_scan_16x16(tx_type);
355
      coef_probs = cm->fc.coef_probs_16x16;
356
      seg_eob = 256;
357 358
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
359 360 361
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_16x16;
#endif
Daniel Kang's avatar
Daniel Kang committed
362
      break;
363
    }
364 365
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
366
      coef_probs = cm->fc.coef_probs_32x32;
367
      seg_eob = 1024;
368 369 370
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;

371 372 373
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_32x32;
#endif
374
      break;
Daniel Kang's avatar
Daniel Kang committed
375
    default:
376
      abort();
Daniel Kang's avatar
Daniel Kang committed
377 378
      break;
  }
John Koleszar's avatar
John Koleszar committed
379
  assert(eob <= seg_eob);
380

381
  pt = combine_entropy_contexts(above_ec, left_ec);
382 383
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
384

385 386
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
387

388 389 390 391
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

392 393 394 395 396 397 398 399 400 401
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache_full, ZERO_TOKEN, sizeof(token_cache_full));
  for (c = 0; c < eob; ++c) {
    rc = scan[c];
    token_cache_full[rc] = vp9_dct_value_tokens_ptr[qcoeff_ptr[rc]].token;
    o = vp9_get_orientation(rc, tx_size);
    if (qcoeff_ptr[rc] != 0)
      last_nz_pos[o] = c;
  }
#endif
402
  {
403
    for (c = 0; c < eob; c++) {
404
      int v = qcoeff_ptr[scan[c]];
405
      int t = vp9_dct_value_tokens_ptr[v].token;
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
      int band = get_coef_band(scan, tx_size, c);
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_CODE_ZEROGROUP
      rc = scan[c];
      o = vp9_get_orientation(rc, tx_size);
      skip_coef_val = (token_cache[rc] == ZERO_TOKEN || is_eoo_list[o]);
      if (!skip_coef_val) {
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
      } else {
        assert(v == 0);
      }
#else
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#endif
421 422
      if (!c || token_cache[scan[c - 1]])
        cost += vp9_cost_bit(coef_probs[type][ref][band][pt][0], 1);
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
      token_cache[scan[c]] = t;
#if CONFIG_CODE_ZEROGROUP
      if (t == ZERO_TOKEN && !skip_coef_val) {
        int eoo = 0, use_eoo;
#if USE_ZPC_EOORIENT == 1
        use_eoo = vp9_use_eoo(c, seg_eob, scan, tx_size,
                              is_last_zero, is_eoo_list);
#else
        use_eoo = 0;
#endif
        if (use_eoo) {
          eoo = vp9_is_eoo(c, eob, scan, tx_size, qcoeff_ptr, last_nz_pos);
          if (eoo && is_eoo_negative[o]) eoo = 0;
          if (eoo) {
            int c_;
            int savings = 0;
            int zsaved = 0;
            savings = vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 1) -
                      vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 0);
            for (c_ = c + 1; c_ < eob; ++c_) {
              if (o == vp9_get_orientation(scan[c_], tx_size)) {
                int pt_ = vp9_get_coef_context(scan, nb, pad,
                                               token_cache_full, c_,
                                               default_eob);
                int band_ = get_coef_band(scan, tx_size, c_);
                assert(token_cache_full[scan[c_]] == ZERO_TOKEN);
                if (!c_ || token_cache_full[scan[c_ - 1]])
                  savings += vp9_cost_bit(
                      coef_probs[type][ref][band_][pt_][0], 1);
                savings += vp9_cost_bit(
                    coef_probs[type][ref][band_][pt_][1], 0);
                zsaved++;
              }
            }
            if (savings < 0) {
            // if (zsaved < ZPC_ZEROSSAVED_EOO) {
              eoo = 0;
              is_eoo_negative[o] = 1;
            }
          }
        }
        if (use_eoo) {
          cost += vp9_cost_bit((*zpc_probs)[ref]
                                           [coef_to_zpc_band(band)]
                                           [coef_to_zpc_ptok(pt)][0], !eoo);
          if (eoo) {
            assert(is_eoo_list[o] == 0);
            is_eoo_list[o] = 1;
          }
        }
      }
      is_last_zero[o] = (t == ZERO_TOKEN);
479
#endif
480
    }
481 482 483 484 485 486 487
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
          [get_coef_band(scan, tx_size, c)]
          [pt][DCT_EOB_TOKEN];
    }
488 489
  }

490 491 492
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
493
  }
494

495 496 497
  return cost;
}

498
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
499 500 501 502 503
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
504 505 506
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
507
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
508 509
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
510
  int s0, s1;
511 512 513 514 515 516 517 518 519 520

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
521

522 523 524
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
525

526 527 528 529 530 531
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
532 533 534
    }
  }

535 536 537 538 539 540
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
541 542 543 544 545 546
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
547
    mbmi->txfm_size = TX_16X16;
548
  } else if (cm->txfm_mode == ALLOW_8X8 ||
549 550
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
551
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
552 553 554 555 556
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

557
  *distortion = d[mbmi->txfm_size];
558
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
559 560
  *skip       = s[mbmi->txfm_size];

561 562
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
563 564
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
565 566 567 568
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
569 570
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
571
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
572
  else
573 574
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
575 576
}

577 578
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
579 580 581 582
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
583
    int this_diff = coeff[i] - dqcoeff[i];
584
    error += (unsigned)this_diff * this_diff;
585
  }
586
  error >>= shift;
587

Frank Galligan's avatar
Frank Galligan committed
588
  return error > INT_MAX ? INT_MAX : (int)error;
589 590
}

591 592
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
593
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
594
                     16 << (bwl + bhl), shift);
595
}
596

597 598 599 600
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
601

602 603 604 605 606 607 608 609
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
610 611
}

612 613
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
614
  MACROBLOCKD *const xd = &x->e_mbd;
615 616 617
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
618
  ENTROPY_CONTEXT t_above[16], t_left[16];
619
  int block, cost;
620

621
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
622
             sizeof(ENTROPY_CONTEXT) * bw);
623
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
624
             sizeof(ENTROPY_CONTEXT) * bh);
625

626 627 628
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
629

630 631
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
632

633 634 635
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
636
  }
637 638 639 640

  return cost;
}

641 642 643
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
644

645 646
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
647 648
  }
  return cost;
649 650
}

651 652 653
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
654
  MACROBLOCKD *const xd = &x->e_mbd;
655 656
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
657

658 659
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
660
  *skippable  = vp9_sby_is_skippable(xd, bsize);
661 662
}

663 664
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
665
                            int *skip, BLOCK_SIZE_TYPE bs,
666
                            int64_t txfm_cache[NB_TXFM_MODES]) {
667 668
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
669

670
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
671

672
  if (bs >= BLOCK_SIZE_SB32X32)
673 674
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
675
  if (bs >= BLOCK_SIZE_MB16X16)
676 677 678 679 680 681
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
682 683

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
684
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
685
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
686
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
687

688 689
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
690 691 692 693
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
694
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
695
  MACROBLOCKD *xd = &x->e_mbd;
696
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
697 698
  int rate = 0;
  int distortion;
699
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
700 701
  const int src_stride = x->plane[0].src.stride;
  uint8_t* const src =
702 703 704
      raster_block_offset_uint8(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
John Koleszar's avatar
John Koleszar committed
705
                                x->plane[0].src.buf, src_stride);
706
  int16_t* const src_diff =
707 708 709
      raster_block_offset_int16(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
710
                                x->plane[0].src_diff);
John Koleszar's avatar
John Koleszar committed
711
  int16_t* const diff =
712 713 714
      raster_block_offset_int16(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
John Koleszar's avatar
John Koleszar committed
715
                                xd->plane[0].diff);
John Koleszar's avatar
John Koleszar committed
716
  int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
717
  uint8_t* const dst =
718 719 720
      raster_block_offset_uint8(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
721
                                xd->plane[0].dst.buf, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
722 723
  ENTROPY_CONTEXT ta = *a, tempa = *a;
  ENTROPY_CONTEXT tl = *l, templ = *l;
Deb Mukherjee's avatar
Deb Mukherjee committed
724 725
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
John Koleszar's avatar
John Koleszar committed
726 727 728 729 730
  /*
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
   * a temp buffer that meets the stride requirements, but we are only
   * interested in the left 4x4 block
   * */
731
  DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
732

Jingning Han's avatar
Jingning Han committed
733
  assert(ib < 4);
734

735
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
736
  for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
737 738
    int64_t this_rd;
    int ratey;
739

Scott LaVarnway's avatar
Scott LaVarnway committed
740
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
741
    rate = bmode_costs[mode];
John Koleszar's avatar
John Koleszar committed
742

743 744 745
    vp9_intra4x4_predict(xd, ib,
                         BLOCK_SIZE_SB8X8,
                         mode, dst, xd->plane[0].dst.stride);
Jingning Han's avatar
Jingning Han committed
746
    vp9_subtract_block(4, 4, src_diff, 8,
John Koleszar's avatar
John Koleszar committed
747
                       src, src_stride,
748
                       dst, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
749

Scott LaVarnway's avatar
Scott LaVarnway committed
750
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
John Koleszar's avatar
John Koleszar committed
751
    tx_type = get_tx_type_4x4(xd, ib);
752
    if (tx_type != DCT_DCT) {
Jingning Han's avatar
Jingning Han committed
753
      vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
754
      x->quantize_b_4x4(x, ib, tx_type, 16);
755
    } else {
Jingning Han's avatar
Jingning Han committed
756
      x->fwd_txm4x4(src_diff, coeff, 16);
757
      x->quantize_b_4x4(x, ib, tx_type, 16);
758
    }
John Koleszar's avatar
John Koleszar committed
759

760 761
    tempa = ta;
    templ = tl;
Jingning Han's avatar
Jingning Han committed
762

763
    ratey = cost_coeffs(cm, x, 0, ib,
John Koleszar's avatar
John Koleszar committed
764
                        PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16);
765
    rate += ratey;
John Koleszar's avatar
John Koleszar committed
766
    distortion = vp9_block_error(coeff,
767 768
                                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
                                 16) >> 2;
Jingning Han's avatar
Jingning Han committed
769

770
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
771

772 773 774 775 776 777 778 779 780
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
      *a = tempa;
      *l = templ;
781
      vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32);
John Koleszar's avatar
John Koleszar committed
782
    }
John Koleszar's avatar
John Koleszar committed
783
  }
Scott LaVarnway's avatar
Scott LaVarnway committed
784 785
  xd->mode_info_context->bmi[ib].as_mode.first =
    (B_PREDICTION_MODE)(*best_mode);
786

Jingning Han's avatar
Jingning Han committed
787
  // inverse transform
Deb Mukherjee's avatar
Deb Mukherjee committed
788
  if (best_tx_type != DCT_DCT)
Jingning Han's avatar
Jingning Han committed
789
    vp9_short_iht4x4(best_dqcoeff, diff, 8, best_tx_type);
790
  else
Jingning Han's avatar
Jingning Han committed
791
    xd->inv_txm4x4(best_dqcoeff, diff, 16);
Jingning Han's avatar
Jingning Han committed
792

793 794 795
  vp9_intra4x4_predict(xd, ib,