vp9_rdopt.c 117 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49 50 51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56 57
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
58

59 60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62 63
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65 66
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68 69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71 72 73 74 75 76 77 78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83 84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86 87 88
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
89

Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
  {I4X4_PRED, INTRA_FRAME,  NONE},
91

John Koleszar's avatar
John Koleszar committed
92 93 94 95
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97 98 99
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
100

John Koleszar's avatar
John Koleszar committed
101 102 103
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
104

John Koleszar's avatar
John Koleszar committed
105 106 107
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
108

John Koleszar's avatar
John Koleszar committed
109 110
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
111
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
112 113
};

114
#if CONFIG_BALANCED_COEFTREE
115 116 117
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
                             vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
118
  int i, j, k, l;
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
            vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
                            vp9_coef_tree);
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
            vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
          }
134 135
}
#else
136 137
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
138
  int i, j, k, l;
139 140 141 142 143 144 145 146 147 148 149
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
            vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
                                 vp9_coef_tree);
          }
150
}
151
#endif
152

153 154 155 156
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
157

158
// 3* dc_qlookup[Q]*dc_qlookup[Q];
159

160
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
161 162 163
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

164
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
165 166 167 168 169 170 171
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
172
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
173
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
174
  }
Paul Wilkins's avatar
Paul Wilkins committed
175
}
John Koleszar's avatar
John Koleszar committed
176

177
static int compute_rd_mult(int qindex) {
178
  const int q = vp9_dc_quant(qindex, 0);
179
  return (11 * q * q) >> 2;
180 181
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
182 183 184
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
185 186
}

187

Dmitry Kovalev's avatar
Dmitry Kovalev committed
188
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
189
  int q, i;
John Koleszar's avatar
John Koleszar committed
190

191
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
192

John Koleszar's avatar
John Koleszar committed
193 194 195 196
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
197
  qindex = clamp(qindex, 0, MAXQ);
198

Dmitry Kovalev's avatar
Dmitry Kovalev committed
199
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
200 201 202 203 204
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
205
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
206
  }
207
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
208
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
209

210
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
211

Dmitry Kovalev's avatar
Dmitry Kovalev committed
212 213
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
214 215
  if (q < 8)
    q = 8;
216

John Koleszar's avatar
John Koleszar committed
217 218 219
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
220

John Koleszar's avatar
John Koleszar committed
221 222 223 224 225 226 227
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
228
    }
John Koleszar's avatar
John Koleszar committed
229 230
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
231

John Koleszar's avatar
John Koleszar committed
232 233 234 235 236 237 238
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
239
    }
John Koleszar's avatar
John Koleszar committed
240
  }
John Koleszar's avatar
John Koleszar committed
241

242
#if CONFIG_BALANCED_COEFTREE
243 244 245
  fill_token_costs(cpi->mb.token_costs,
                   cpi->mb.token_costs_noskip,
                   cpi->common.fc.coef_probs);
246
#else
247 248
  fill_token_costs(cpi->mb.token_costs,
                   cpi->common.fc.coef_probs);
249
#endif
250

251
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
252 253 254 255
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
256
  /*rough estimate for costing*/
257
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
258

259
  if (cpi->common.frame_type != KEY_FRAME) {
260
    vp9_build_nmv_cost_table(
261 262 263 264 265 266
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
267 268
}

269
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
270
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
271

272
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
273 274 275
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
276

John Koleszar's avatar
John Koleszar committed
277
  return error;
John Koleszar's avatar
John Koleszar committed
278 279
}

280
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
281
                              int plane, int block, PLANE_TYPE type,
282 283
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
284 285
                              TX_SIZE tx_size,
                              int y_blocks) {
286
  MACROBLOCKD *const xd = &mb->e_mbd;
287 288
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
289
  int c = 0;
290 291
  int cost = 0, pad;
  const int *scan, *nb;
292 293 294
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
295
  const int ref = mbmi->ref_frame != INTRA_FRAME;
296
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
297
      mb->token_costs[tx_size][type][ref];
298
  ENTROPY_CONTEXT above_ec, left_ec;
299
  TX_TYPE tx_type = DCT_DCT;
300

301
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
302 303 304 305
#if CONFIG_BALANCED_COEFTREE
  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      mb->token_costs_noskip[tx_size][type][ref];
#else
306
  vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
307 308
#endif

309 310
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
311
  const uint8_t * band_translate;
312 313

  // Check for consistency of tx_size with mode info
314
  assert((!type && !plane) || (type && plane));
315 316 317 318 319 320 321
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

322 323 324
#if !CONFIG_BALANCED_COEFTREE
  vp9_model_to_full_probs_sb(cm->fc.coef_probs[tx_size][type][ref], coef_probs);
#endif
325
  switch (tx_size) {
326
    case TX_4X4: {
327
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
328
          get_tx_type_4x4(xd, block) : DCT_DCT;
329 330
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
331
      seg_eob = 16;
332
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
333
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
334
      break;
335
    }
336 337
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
338
      const int sz = 1 + b_width_log2(sb_type);
339
      const int x = block & ((1 << sz) - 1), y = block - x;
340 341
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
342 343
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
344
      scan = get_scan_8x8(tx_type);
345
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
346
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
347
      break;
348 349 350
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
351
      const int sz = 2 + b_width_log2(sb_type);
352
      const int x = block & ((1 << sz) - 1), y = block - x;
353 354
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
355
      scan = get_scan_16x16(tx_type);
356
      seg_eob = 256;
357 358
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
359
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
360
      break;
361
    }
362
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
363
      scan = vp9_default_scan_32x32;
364
      seg_eob = 1024;
365 366
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
367
      band_translate = vp9_coefband_trans_8x8plus;
368
      break;
Daniel Kang's avatar
Daniel Kang committed
369
    default:
370
      abort();
Daniel Kang's avatar
Daniel Kang committed
371 372
      break;
  }
John Koleszar's avatar
John Koleszar committed
373
  assert(eob <= seg_eob);
374

375
  pt = combine_entropy_contexts(above_ec, left_ec);
376 377
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
378

379 380
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
381

382 383 384 385
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

386
  {
387
    for (c = 0; c < eob; c++) {
388
      int v = qcoeff_ptr[scan[c]];
389
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
390
      int band = get_coef_band(band_translate, c);
391 392
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
393

394 395 396 397 398 399
#if CONFIG_BALANCED_COEFTREE
      if (!c || token_cache[scan[c - 1]])  // do not skip eob
        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
      else
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#else
400
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
401
      if (!c || token_cache[scan[c - 1]])
402
        cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
403
#endif
404
      token_cache[scan[c]] = vp9_pt_energy_class[t];
405
    }
406 407 408
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
409 410 411 412 413
#if CONFIG_BALANCED_COEFTREE
      cost += mb->token_costs_noskip[tx_size][type][ref]
          [get_coef_band(band_translate, c)]
          [pt][DCT_EOB_TOKEN];
#else
414
      cost += mb->token_costs[tx_size][type][ref]
Paul Wilkins's avatar
Paul Wilkins committed
415
          [get_coef_band(band_translate, c)]
416
          [pt][DCT_EOB_TOKEN];
417
#endif
418
    }
419 420
  }

421 422 423
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
424
  }
425

426 427 428
  return cost;
}

429
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
430 431 432 433 434
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
435 436 437
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
438
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
439 440
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
441
  int s0, s1;
442 443 444 445 446 447 448 449 450 451

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
452

453 454 455
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
456

457 458 459 460 461 462
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
463 464 465
    }
  }

466 467 468 469 470 471
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
472 473 474 475 476 477
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
478
    mbmi->txfm_size = TX_16X16;
479
  } else if (cm->txfm_mode == ALLOW_8X8 ||
480 481
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
482
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
483 484 485 486 487
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

488
  *distortion = d[mbmi->txfm_size];
489
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
490 491
  *skip       = s[mbmi->txfm_size];

492 493
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
494 495
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
496 497 498 499
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
500 501
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
502
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
503
  else
504 505
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
506 507
}

508 509
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
510 511 512 513
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
514
    int this_diff = coeff[i] - dqcoeff[i];
515
    error += (unsigned)this_diff * this_diff;
516
  }
517
  error >>= shift;
518

Frank Galligan's avatar
Frank Galligan committed
519
  return error > INT_MAX ? INT_MAX : (int)error;
520 521
}

522 523
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
524
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
525
                     16 << (bwl + bhl), shift);
526
}
527

528 529 530 531
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
532

533 534 535 536 537 538 539 540
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
541 542
}

543 544
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
545
  MACROBLOCKD *const xd = &x->e_mbd;
546 547 548
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
549
  ENTROPY_CONTEXT t_above[16], t_left[16];
550
  int block, cost;
551

552
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
553
             sizeof(ENTROPY_CONTEXT) * bw);
554
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
555
             sizeof(ENTROPY_CONTEXT) * bh);
556

557 558 559
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
560

561 562
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
563

564 565 566
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
567
  }
568 569 570 571

  return cost;
}

572 573 574
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
575

576 577
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
578 579
  }
  return cost;
580 581
}

582 583 584
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
585
  MACROBLOCKD *const xd = &x->e_mbd;
586 587
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
588

589 590
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
591
  *skippable  = vp9_sby_is_skippable(xd, bsize);
592 593
}

594 595
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
596
                            int *skip, BLOCK_SIZE_TYPE bs,
597
                            int64_t txfm_cache[NB_TXFM_MODES]) {
598 599
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
600 601
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
602

603
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
604

Jim Bankoski's avatar
Jim Bankoski committed
605 606 607 608 609 610 611 612 613 614 615 616 617 618
  if (cpi->speed > 4) {
    if (bs >= BLOCK_SIZE_SB32X32) {
      mbmi->txfm_size = TX_32X32;
    } else if (bs >= BLOCK_SIZE_MB16X16) {
      mbmi->txfm_size = TX_16X16;
    } else if (bs >= BLOCK_SIZE_SB8X8) {
      mbmi->txfm_size = TX_8X8;
    } else {
      mbmi->txfm_size = TX_4X4;
    }
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
                             mbmi->txfm_size);
    return;
  }
619
  if (bs >= BLOCK_SIZE_SB32X32)
620 621
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
622
  if (bs >= BLOCK_SIZE_MB16X16)
623 624 625 626 627 628
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
629 630

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
631
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
632
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
633
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
634

635
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
636
                                     MB_PREDICTION_MODE *best_mode,
637 638 639
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
640 641 642
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
643
  MACROBLOCKD *xd = &x->e_mbd;
644
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
645 646
  int rate = 0;
  int distortion;
647
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
648
  const int src_stride = x->plane[0].src.stride;
649 650 651 652 653
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
654 655
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
656 657 658 659
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
660

Jingning Han's avatar
Jingning Han committed
661
  assert(ib < 4);
662

663 664
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
665
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
666 667

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
668
    int64_t this_rd;
669
    int ratey = 0;
670

671
    rate = bmode_costs[mode];
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
689 690
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
                             dst, xd->plane[0].dst.stride);
691 692 693 694 695 696 697 698 699 700 701 702
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
703

704 705 706 707
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
708

709 710 711 712 713 714 715 716
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
717

718 719
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
720

721 722 723 724 725 726 727
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
728 729 730 731 732 733 734 735 736 737
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
738
    }
John Koleszar's avatar
John Koleszar committed
739
  }
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
759
  }
John Koleszar's avatar
John Koleszar committed
760

John Koleszar's avatar
John Koleszar committed
761
  return best_rd;
John Koleszar's avatar
John Koleszar committed
762 763
}

764 765
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
766
                                         int *Distortion, int64_t best_rd) {
767
  int i, j;
John Koleszar's avatar
John Koleszar committed
768
  MACROBLOCKD *const xd = &mb->e_mbd;
769 770 771 772
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
773
  int cost = 0;
John Koleszar's avatar
John Koleszar committed
774 775 776
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
777
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
778
  int *bmode_costs;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
779
  MODE_INFO *const mic = xd->mode_info_context;
John Koleszar's avatar
John Koleszar committed
780

781 782
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
783

784
  bmode_costs = mb->mbmode_cost;
785

786 787 788
  for (idy = 0; idy < 2; idy += bh) {
    for (idx = 0; idx < 2; idx += bw) {
      const int mis = xd->mode_info_stride;
Yaowu Xu's avatar
Yaowu Xu committed