vp9_rdopt.c 114 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
67 68
};

69
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
70 71
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
72

73 74
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85 86 87 88 89 90 91 92
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
93

94
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
95

96 97 98
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
99

100 101 102
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
103

Yaowu Xu's avatar
Yaowu Xu committed
104
  {I4X4_PRED,    INTRA_FRAME,  NONE},
105

John Koleszar's avatar
John Koleszar committed
106 107 108 109
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
110

John Koleszar's avatar
John Koleszar committed
111 112 113
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115 116 117
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119 120 121
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123 124
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
144 145
};

146 147
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
148
                             TX_SIZE tx_size) {
149
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
150

151
  for (i = 0; i < BLOCK_TYPES; i++)
152 153
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
154 155
        for (l = 0; l < PREV_COEF_CONTEXTS; l++)
          vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l],
156 157 158
                               vp9_coef_tree);
}

159 160 161 162
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
163

164
// 3* dc_qlookup[Q]*dc_qlookup[Q];
165

166
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
167 168 169
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

170
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
171 172 173 174 175 176 177
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
178
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
179
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
180
  }
Paul Wilkins's avatar
Paul Wilkins committed
181
}
John Koleszar's avatar
John Koleszar committed
182

183
static int compute_rd_mult(int qindex) {
184
  const int q = vp9_dc_quant(qindex, 0);
185
  return (11 * q * q) >> 2;
186 187
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
188 189 190
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
191 192
}

193

Dmitry Kovalev's avatar
Dmitry Kovalev committed
194
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
195
  int q, i;
John Koleszar's avatar
John Koleszar committed
196

197
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
198

John Koleszar's avatar
John Koleszar committed
199 200 201 202
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
203
  qindex = clamp(qindex, 0, MAXQ);
204

Dmitry Kovalev's avatar
Dmitry Kovalev committed
205
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
206 207 208 209 210
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
211
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
212
  }
213
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
214
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
215

216
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
217

Dmitry Kovalev's avatar
Dmitry Kovalev committed
218 219
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
220 221
  if (q < 8)
    q = 8;
222

John Koleszar's avatar
John Koleszar committed
223 224 225
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
226

John Koleszar's avatar
John Koleszar committed
227 228 229 230 231 232 233
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
234
    }
John Koleszar's avatar
John Koleszar committed
235 236
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
237

John Koleszar's avatar
John Koleszar committed
238 239 240 241 242 243 244
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
245
    }
John Koleszar's avatar
John Koleszar committed
246
  }
John Koleszar's avatar
John Koleszar committed
247

248
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
249
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
250
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
251
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
252
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
253
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
254
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
255
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
256

257
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
258 259 260 261
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
262 263
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
264
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
265

266
  if (cpi->common.frame_type != KEY_FRAME) {
267
    vp9_build_nmv_cost_table(
268 269 270 271 272 273
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
274 275
}

276
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
277
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
278

279
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
280 281 282
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
283

John Koleszar's avatar
John Koleszar committed
284
  return error;
John Koleszar's avatar
John Koleszar committed
285 286
}

287
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
288
                              int plane, int block, PLANE_TYPE type,
289 290
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
291 292
                              TX_SIZE tx_size,
                              int y_blocks) {
293
  MACROBLOCKD *const xd = &mb->e_mbd;
294 295
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
296
  int c = 0;
297 298
  int cost = 0, pad;
  const int *scan, *nb;
299 300 301
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
302
  const int ref = mbmi->ref_frame != INTRA_FRAME;
303
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
304
      mb->token_costs[tx_size][type][ref];
305
  ENTROPY_CONTEXT above_ec, left_ec;
306
  TX_TYPE tx_type = DCT_DCT;
307

308 309 310 311 312 313 314 315
#if CONFIG_CODE_ZEROGROUP
  int last_nz_pos[3] = {-1, -1, -1};  // Encoder only
  int is_eoo_list[3] = {0, 0, 0};
  int is_eoo_negative[3] = {0, 0, 0};
  int is_last_zero[3] = {0, 0, 0};
  int o, rc, skip_coef_val;
  vp9_zpc_probs *zpc_probs;
  uint8_t token_cache_full[1024];
316
#endif
317
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
318 319 320 321
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
322

323 324 325 326
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache, UNKNOWN_TOKEN, sizeof(token_cache));
#endif

327
  // Check for consistency of tx_size with mode info
328
  assert((!type && !plane) || (type && plane));
329 330 331 332 333 334 335
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

336
  switch (tx_size) {
337
    case TX_4X4: {
338
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
339
          get_tx_type_4x4(xd, block) : DCT_DCT;
340 341
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
342
      coef_probs = cm->fc.coef_probs_4x4;
343
      seg_eob = 16;
344
      scan = get_scan_4x4(tx_type);
345 346 347
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_4x4;
#endif
Daniel Kang's avatar
Daniel Kang committed
348
      break;
349
    }
350 351
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
352
      const int sz = 1 + b_width_log2(sb_type);
353
      const int x = block & ((1 << sz) - 1), y = block - x;
354 355
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
356 357
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
358
      scan = get_scan_8x8(tx_type);
359
      coef_probs = cm->fc.coef_probs_8x8;
360
      seg_eob = 64;
361 362 363
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_8x8;
#endif
Daniel Kang's avatar
Daniel Kang committed
364
      break;
365 366 367
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
368
      const int sz = 2 + b_width_log2(sb_type);
369
      const int x = block & ((1 << sz) - 1), y = block - x;
370 371
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
372
      scan = get_scan_16x16(tx_type);
373
      coef_probs = cm->fc.coef_probs_16x16;
374
      seg_eob = 256;
375 376
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
377 378 379
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_16x16;
#endif
Daniel Kang's avatar
Daniel Kang committed
380
      break;
381
    }
382 383
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
384
      coef_probs = cm->fc.coef_probs_32x32;
385
      seg_eob = 1024;
386 387 388
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;

389 390 391
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_32x32;
#endif
392
      break;
Daniel Kang's avatar
Daniel Kang committed
393
    default:
394
      abort();
Daniel Kang's avatar
Daniel Kang committed
395 396
      break;
  }
John Koleszar's avatar
John Koleszar committed
397
  assert(eob <= seg_eob);
398

399
  pt = combine_entropy_contexts(above_ec, left_ec);
400 401
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
402

403 404
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
405

406 407 408 409
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

410 411 412 413 414 415 416 417 418 419
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache_full, ZERO_TOKEN, sizeof(token_cache_full));
  for (c = 0; c < eob; ++c) {
    rc = scan[c];
    token_cache_full[rc] = vp9_dct_value_tokens_ptr[qcoeff_ptr[rc]].token;
    o = vp9_get_orientation(rc, tx_size);
    if (qcoeff_ptr[rc] != 0)
      last_nz_pos[o] = c;
  }
#endif
420
  {
421
    for (c = 0; c < eob; c++) {
422
      int v = qcoeff_ptr[scan[c]];
423
      int t = vp9_dct_value_tokens_ptr[v].token;
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
      int band = get_coef_band(scan, tx_size, c);
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_CODE_ZEROGROUP
      rc = scan[c];
      o = vp9_get_orientation(rc, tx_size);
      skip_coef_val = (token_cache[rc] == ZERO_TOKEN || is_eoo_list[o]);
      if (!skip_coef_val) {
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
      } else {
        assert(v == 0);
      }
#else
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#endif
439 440
      if (!c || token_cache[scan[c - 1]])
        cost += vp9_cost_bit(coef_probs[type][ref][band][pt][0], 1);
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
      token_cache[scan[c]] = t;
#if CONFIG_CODE_ZEROGROUP
      if (t == ZERO_TOKEN && !skip_coef_val) {
        int eoo = 0, use_eoo;
#if USE_ZPC_EOORIENT == 1
        use_eoo = vp9_use_eoo(c, seg_eob, scan, tx_size,
                              is_last_zero, is_eoo_list);
#else
        use_eoo = 0;
#endif
        if (use_eoo) {
          eoo = vp9_is_eoo(c, eob, scan, tx_size, qcoeff_ptr, last_nz_pos);
          if (eoo && is_eoo_negative[o]) eoo = 0;
          if (eoo) {
            int c_;
            int savings = 0;
            int zsaved = 0;
            savings = vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 1) -
                      vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 0);
            for (c_ = c + 1; c_ < eob; ++c_) {
              if (o == vp9_get_orientation(scan[c_], tx_size)) {
                int pt_ = vp9_get_coef_context(scan, nb, pad,
                                               token_cache_full, c_,
                                               default_eob);
                int band_ = get_coef_band(scan, tx_size, c_);
                assert(token_cache_full[scan[c_]] == ZERO_TOKEN);
                if (!c_ || token_cache_full[scan[c_ - 1]])
                  savings += vp9_cost_bit(
                      coef_probs[type][ref][band_][pt_][0], 1);
                savings += vp9_cost_bit(
                    coef_probs[type][ref][band_][pt_][1], 0);
                zsaved++;
              }
            }
            if (savings < 0) {
            // if (zsaved < ZPC_ZEROSSAVED_EOO) {
              eoo = 0;
              is_eoo_negative[o] = 1;
            }
          }
        }
        if (use_eoo) {
          cost += vp9_cost_bit((*zpc_probs)[ref]
                                           [coef_to_zpc_band(band)]
                                           [coef_to_zpc_ptok(pt)][0], !eoo);
          if (eoo) {
            assert(is_eoo_list[o] == 0);
            is_eoo_list[o] = 1;
          }
        }
      }
      is_last_zero[o] = (t == ZERO_TOKEN);
497
#endif
498
    }
499 500 501 502 503 504 505
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
          [get_coef_band(scan, tx_size, c)]
          [pt][DCT_EOB_TOKEN];
    }
506 507
  }

508 509 510
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
511
  }
512

513 514 515
  return cost;
}

516
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
517 518 519 520 521
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
522 523 524
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
525
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
526 527
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
528
  int s0, s1;
529 530 531 532 533 534 535 536 537 538

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
539

540 541 542
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
543

544 545 546 547 548 549
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
550 551 552
    }
  }

553 554 555 556 557 558
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
559 560 561 562 563 564
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
565
    mbmi->txfm_size = TX_16X16;
566
  } else if (cm->txfm_mode == ALLOW_8X8 ||
567 568
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
569
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
570 571 572 573 574
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

575
  *distortion = d[mbmi->txfm_size];
576
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
577 578
  *skip       = s[mbmi->txfm_size];

579 580
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
581 582
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
583 584 585 586
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
587 588
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
589
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
590
  else
591 592
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
593 594
}

595 596
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
597 598 599 600
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
601
    int this_diff = coeff[i] - dqcoeff[i];
602
    error += (unsigned)this_diff * this_diff;
603
  }
604
  error >>= shift;
605

Frank Galligan's avatar
Frank Galligan committed
606
  return error > INT_MAX ? INT_MAX : (int)error;
607 608
}

609 610
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
611
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
612
                     16 << (bwl + bhl), shift);
613
}
614

615 616 617 618
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
619

620 621 622 623 624 625 626 627
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
628 629
}

630 631
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
632
  MACROBLOCKD *const xd = &x->e_mbd;
633 634 635
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
636
  ENTROPY_CONTEXT t_above[16], t_left[16];
637
  int block, cost;
638

639
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
640
             sizeof(ENTROPY_CONTEXT) * bw);
641
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
642
             sizeof(ENTROPY_CONTEXT) * bh);
643

644 645 646
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
647

648 649
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
650

651 652 653
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
654
  }
655 656 657 658

  return cost;
}

659 660 661
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
662

663 664
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
665 666
  }
  return cost;
667 668
}

669 670 671
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
672
  MACROBLOCKD *const xd = &x->e_mbd;
673 674
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
675

676 677
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
678
  *skippable  = vp9_sby_is_skippable(xd, bsize);
679 680
}

681 682
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
683
                            int *skip, BLOCK_SIZE_TYPE bs,
684
                            int64_t txfm_cache[NB_TXFM_MODES]) {
685 686
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
687

688
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
689

690
  if (bs >= BLOCK_SIZE_SB32X32)
691 692
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
693
  if (bs >= BLOCK_SIZE_MB16X16)
694 695 696 697 698 699
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
700 701

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
702
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
703
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
704
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
705

706 707
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
708 709 710 711
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
712
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
713
  MACROBLOCKD *xd = &x->e_mbd;
714
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
715 716
  int rate = 0;
  int distortion;
717
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
718 719
  const int src_stride = x->plane[0].src.stride;
  uint8_t* const src =
720 721 722
      raster_block_offset_uint8(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
John Koleszar's avatar
John Koleszar committed
723
                                x->plane[0].src.buf, src_stride);
724
  int16_t* const src_diff =
725 726 727
      raster_block_offset_int16(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
728
                                x->plane[0].src_diff);
John Koleszar's avatar
John Koleszar committed
729
  int16_t* const diff =
730 731 732
      raster_block_offset_int16(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
John Koleszar's avatar
John Koleszar committed
733
                                xd->plane[0].diff);
John Koleszar's avatar
John Koleszar committed
734
  int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
735
  uint8_t* const dst =
736 737 738
      raster_block_offset_uint8(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
739
                                xd->plane[0].dst.buf, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
740 741
  ENTROPY_CONTEXT ta = *a, tempa = *a;
  ENTROPY_CONTEXT tl = *l, templ = *l;
Deb Mukherjee's avatar
Deb Mukherjee committed
742 743
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
John Koleszar's avatar
John Koleszar committed
744 745 746 747 748
  /*
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
   * a temp buffer that meets the stride requirements, but we are only
   * interested in the left 4x4 block
   * */
749
  DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
750

Jingning Han's avatar
Jingning Han committed
751
  assert(ib < 4);
752
#if CONFIG_NEWBINTRAMODES
Scott LaVarnway's avatar
Scott LaVarnway committed
753
  xd->mode_info_context->bmi[ib].as_mode.context =
754
    vp9_find_bpred_context(xd, ib, dst, xd->plane[0].dst.stride);
755
#endif
756
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
757
  for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
758 759
    int64_t this_rd;
    int ratey;
760 761

#if CONFIG_NEWBINTRAMODES
762 763 764 765 766 767 768
    if (xd->frame_type == KEY_FRAME) {
      if (mode == B_CONTEXT_PRED) continue;
    } else {
      if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
          mode < B_CONTEXT_PRED)
        continue;
    }
769
#endif
770

Scott LaVarnway's avatar
Scott LaVarnway committed
771
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
772
#if CONFIG_NEWBINTRAMODES
773 774
    rate = bmode_costs[
        mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode];
775
#else
776
    rate = bmode_costs[mode];
777
#endif
John Koleszar's avatar
John Koleszar committed
778

779 780 781
    vp9_intra4x4_predict(xd, ib,
                         BLOCK_SIZE_SB8X8,
                         mode, dst, xd->plane[0].dst.stride);
Jingning Han's avatar
Jingning Han committed
782
    vp9_subtract_block(4, 4, src_diff, 8,
John Koleszar's avatar
John Koleszar committed
783
                       src, src_stride,
784
                       dst, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
785

Scott LaVarnway's avatar
Scott LaVarnway committed
786
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
John Koleszar's avatar
John Koleszar committed
787
    tx_type = get_tx_type_4x4(xd, ib);
788
    if (tx_type != DCT_DCT) {
Jingning Han's avatar
Jingning Han committed
789
      vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
790
      x->quantize_b_4x4(x, ib, tx_type, 16);
791
    } else {
Jingning Han's avatar
Jingning Han committed
792
      x->fwd_txm4x4(src_diff, coeff, 16);
793
      x->quantize_b_4x4(x, ib, tx_type, 16);
794
    }
John Koleszar's avatar
John Koleszar committed
795

796 797
    tempa = ta;
    templ = tl;
Jingning Han's avatar
Jingning Han committed
798

799
    ratey = cost_coeffs(cm, x, 0, ib,
John Koleszar's avatar
John Koleszar committed
800
                        PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16);
801
    rate += ratey;
John Koleszar's avatar
John Koleszar committed
802
    distortion = vp9_block_error(coeff,
803 804
                                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
                                 16) >> 2;
Jingning Han's avatar
Jingning Han committed
805

806
    this_rd = RDCOST