vp9_rdopt.c 116 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49 50 51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56 57
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
58

59 60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62 63
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65 66
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68 69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71 72 73 74 75 76 77 78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83 84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86 87 88
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
89

Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
  {I4X4_PRED, INTRA_FRAME,  NONE},
91

John Koleszar's avatar
John Koleszar committed
92 93 94 95
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97 98 99
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
100

John Koleszar's avatar
John Koleszar committed
101 102 103
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
104

John Koleszar's avatar
John Koleszar committed
105 106 107
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
108

John Koleszar's avatar
John Koleszar committed
109 110
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
111
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
112 113
};

114 115 116
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
                             vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
117
  int i, j, k, l;
118 119 120 121 122 123 124 125 126 127
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
            vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
                            vp9_coef_tree);
128
#if CONFIG_BALANCED_COEFTREE
129 130 131 132
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
            vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
133
#else
134 135
            vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
                                 vp9_coef_tree);
136 137 138
            assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
                   cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
#endif
139
          }
140 141
}

142 143 144 145
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
146

147
// 3* dc_qlookup[Q]*dc_qlookup[Q];
148

149
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
150 151 152
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

153
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
154 155 156 157 158 159 160
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
161
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
162
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
163
  }
Paul Wilkins's avatar
Paul Wilkins committed
164
}
John Koleszar's avatar
John Koleszar committed
165

166
static int compute_rd_mult(int qindex) {
167
  const int q = vp9_dc_quant(qindex, 0);
168
  return (11 * q * q) >> 2;
169 170
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
171 172 173
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
174 175
}

176

Dmitry Kovalev's avatar
Dmitry Kovalev committed
177
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
178
  int q, i;
John Koleszar's avatar
John Koleszar committed
179

180
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
181

John Koleszar's avatar
John Koleszar committed
182 183 184 185
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
186
  qindex = clamp(qindex, 0, MAXQ);
187

Dmitry Kovalev's avatar
Dmitry Kovalev committed
188
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
189 190 191 192 193
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
194
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
195
  }
196
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
197
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
198

199
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
200

Dmitry Kovalev's avatar
Dmitry Kovalev committed
201 202
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
203 204
  if (q < 8)
    q = 8;
205

John Koleszar's avatar
John Koleszar committed
206 207 208
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
209

John Koleszar's avatar
John Koleszar committed
210 211 212 213 214 215 216
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
217
    }
John Koleszar's avatar
John Koleszar committed
218 219
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
220

John Koleszar's avatar
John Koleszar committed
221 222 223 224 225 226 227
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
228
    }
John Koleszar's avatar
John Koleszar committed
229
  }
John Koleszar's avatar
John Koleszar committed
230

231 232 233
  fill_token_costs(cpi->mb.token_costs,
                   cpi->mb.token_costs_noskip,
                   cpi->common.fc.coef_probs);
234

235
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
236 237 238 239
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
240
  /*rough estimate for costing*/
241
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
242

243
  if (cpi->common.frame_type != KEY_FRAME) {
244
    vp9_build_nmv_cost_table(
245 246 247 248 249 250
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
251 252
}

253
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
254
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
255

256
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
257 258 259
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
260

John Koleszar's avatar
John Koleszar committed
261
  return error;
John Koleszar's avatar
John Koleszar committed
262 263
}

264
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
265
                              int plane, int block, PLANE_TYPE type,
266 267
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
268 269
                              TX_SIZE tx_size,
                              int y_blocks) {
270
  MACROBLOCKD *const xd = &mb->e_mbd;
271 272
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
273
  int c = 0;
274 275
  int cost = 0, pad;
  const int *scan, *nb;
276 277 278
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
279
  const int ref = mbmi->ref_frame != INTRA_FRAME;
280
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
281
      mb->token_costs[tx_size][type][ref];
282
  ENTROPY_CONTEXT above_ec, left_ec;
283
  TX_TYPE tx_type = DCT_DCT;
284

285
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
286 287 288
  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      mb->token_costs_noskip[tx_size][type][ref];

289 290
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
291
  const uint8_t * band_translate;
292 293

  // Check for consistency of tx_size with mode info
294
  assert((!type && !plane) || (type && plane));
295 296 297 298 299 300 301
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

302
  switch (tx_size) {
303
    case TX_4X4: {
304
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
305
          get_tx_type_4x4(xd, block) : DCT_DCT;
306 307
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
308
      seg_eob = 16;
309
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
310
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
311
      break;
312
    }
313 314
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
315
      const int sz = 1 + b_width_log2(sb_type);
316
      const int x = block & ((1 << sz) - 1), y = block - x;
317 318
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
319 320
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
321
      scan = get_scan_8x8(tx_type);
322
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
323
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
324
      break;
325 326 327
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
328
      const int sz = 2 + b_width_log2(sb_type);
329
      const int x = block & ((1 << sz) - 1), y = block - x;
330 331
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
332
      scan = get_scan_16x16(tx_type);
333
      seg_eob = 256;
334 335
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
336
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
337
      break;
338
    }
339
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
340
      scan = vp9_default_scan_32x32;
341
      seg_eob = 1024;
342 343
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
344
      band_translate = vp9_coefband_trans_8x8plus;
345
      break;
Daniel Kang's avatar
Daniel Kang committed
346
    default:
347
      abort();
Daniel Kang's avatar
Daniel Kang committed
348 349
      break;
  }
John Koleszar's avatar
John Koleszar committed
350
  assert(eob <= seg_eob);
351

352
  pt = combine_entropy_contexts(above_ec, left_ec);
353 354
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
355

356 357
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
358

359 360 361 362
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

363
  {
364
    for (c = 0; c < eob; c++) {
365
      int v = qcoeff_ptr[scan[c]];
366
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
367
      int band = get_coef_band(band_translate, c);
368 369
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
370

371 372 373 374
      if (!c || token_cache[scan[c - 1]])  // do not skip eob
        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
      else
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
375
      token_cache[scan[c]] = vp9_pt_energy_class[t];
376
    }
377 378 379
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
380 381 382
      cost += mb->token_costs_noskip[tx_size][type][ref]
          [get_coef_band(band_translate, c)]
          [pt][DCT_EOB_TOKEN];
383
    }
384 385
  }

386 387 388
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
389
  }
390

391 392 393
  return cost;
}

394
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
395 396 397 398 399
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
400 401 402
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
403
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
404 405
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
406
  int s0, s1;
407 408 409 410 411 412 413 414 415 416

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
417

418 419 420
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
421

422 423 424 425 426 427
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
428 429 430
    }
  }

431 432 433 434 435 436
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
437 438 439 440 441 442
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
443
    mbmi->txfm_size = TX_16X16;
444
  } else if (cm->txfm_mode == ALLOW_8X8 ||
445 446
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
447
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
448 449 450 451 452
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

453
  *distortion = d[mbmi->txfm_size];
454
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
455 456
  *skip       = s[mbmi->txfm_size];

457 458
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
459 460
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
461 462 463 464
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
465 466
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
467
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
468
  else
469 470
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
471 472
}

473 474
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
475 476 477 478
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
479
    int this_diff = coeff[i] - dqcoeff[i];
480
    error += (unsigned)this_diff * this_diff;
481
  }
482
  error >>= shift;
483

Frank Galligan's avatar
Frank Galligan committed
484
  return error > INT_MAX ? INT_MAX : (int)error;
485 486
}

487 488
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
489
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
490
                     16 << (bwl + bhl), shift);
491
}
492

493 494 495 496
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
497

498 499 500 501 502 503 504 505
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
506 507
}

508 509
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
510
  MACROBLOCKD *const xd = &x->e_mbd;
511 512 513
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
514
  ENTROPY_CONTEXT t_above[16], t_left[16];
515
  int block, cost;
516

517
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
518
             sizeof(ENTROPY_CONTEXT) * bw);
519
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
520
             sizeof(ENTROPY_CONTEXT) * bh);
521

522 523 524
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
525

526 527
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
528

529 530 531
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
532
  }
533 534 535 536

  return cost;
}

537 538 539
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
540

541 542
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
543 544
  }
  return cost;
545 546
}

547 548 549
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
550
  MACROBLOCKD *const xd = &x->e_mbd;
551 552
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
553

554 555
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
556
  *skippable  = vp9_sby_is_skippable(xd, bsize);
557 558
}

559 560
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
561
                            int *skip, BLOCK_SIZE_TYPE bs,
562
                            int64_t txfm_cache[NB_TXFM_MODES]) {
563 564
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
565 566
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
567

568
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
569

Jim Bankoski's avatar
Jim Bankoski committed
570 571 572 573 574 575 576 577 578 579 580 581 582 583
  if (cpi->speed > 4) {
    if (bs >= BLOCK_SIZE_SB32X32) {
      mbmi->txfm_size = TX_32X32;
    } else if (bs >= BLOCK_SIZE_MB16X16) {
      mbmi->txfm_size = TX_16X16;
    } else if (bs >= BLOCK_SIZE_SB8X8) {
      mbmi->txfm_size = TX_8X8;
    } else {
      mbmi->txfm_size = TX_4X4;
    }
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
                             mbmi->txfm_size);
    return;
  }
584
  if (bs >= BLOCK_SIZE_SB32X32)
585 586
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
587
  if (bs >= BLOCK_SIZE_MB16X16)
588 589 590 591 592 593
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
594 595

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
596
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
597
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
598
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
599

600
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
601
                                     MB_PREDICTION_MODE *best_mode,
602 603 604
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
605 606 607
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
608
  MACROBLOCKD *xd = &x->e_mbd;
609
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
610 611
  int rate = 0;
  int distortion;
612
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
613
  const int src_stride = x->plane[0].src.stride;
614 615 616 617 618
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
619 620
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
621 622 623 624
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
625

Jingning Han's avatar
Jingning Han committed
626
  assert(ib < 4);
627

628 629
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
630
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
631 632

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
633
    int64_t this_rd;
634
    int ratey = 0;
635

636
    rate = bmode_costs[mode];
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
654 655
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
                             dst, xd->plane[0].dst.stride);
656 657 658 659 660 661 662 663 664 665 666 667
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
668

669 670 671 672
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
673

674 675 676 677 678 679 680 681
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
682

683 684
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
685

686 687 688 689 690 691 692
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
693 694 695 696 697 698 699 700 701 702
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
703
    }
John Koleszar's avatar
John Koleszar committed
704
  }
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
724
  }
John Koleszar's avatar
John Koleszar committed
725

John Koleszar's avatar
John Koleszar committed
726
  return best_rd;
John Koleszar's avatar
John Koleszar committed
727 728
}

729 730
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
731
                                         int *Distortion, int64_t best_rd) {
732
  int i, j;
John Koleszar's avatar
John Koleszar committed
733
  MACROBLOCKD *const xd = &mb->e_mbd;
734 735 736 737
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
738
  int cost = 0;
John Koleszar's avatar
John Koleszar committed
739 740 741
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
742
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
743
  int *bmode_costs;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
744
  MODE_INFO *const mic = xd->mode_info_context;
John Koleszar's avatar
John Koleszar committed
745

746 747
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
748

749
  bmode_costs = mb->mbmode_cost;
750

751 752 753
  for (idy = 0; idy < 2; idy += bh) {
    for (idx = 0; idx < 2; idx += bw) {
      const int mis = xd->mode_info_stride;
Yaowu Xu's avatar
Yaowu Xu committed
754
      MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
755 756 757 758 759 760 761 762 763
      int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
      int UNINITIALIZED_IS_SAFE(d);
      i = idy * 2 + idx;

      if (xd->frame_type == KEY_FRAME) {
        const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                     left_block_mode(mic, i) : DC_PRED;

764
        bmode_costs  = mb->y_mode_costs[A][L];
765
      }
766

767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
      total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
                                        t_above + idx, t_left + idy,
                                        &r, &ry, &d, bsize);
      cost += r;
      distortion += d;
      tot_rate_y += ry;

      mic->bmi[i].as_mode.first = best_mode;
      for (j = 1; j < bh; ++j)
        mic->bmi[i + j * 2].as_mode.first = best_mode;
      for (j = 1; j < bw; ++j)
        mic->bmi[i + j].as_mode.first = best_mode;

      if (total_rd >= best_rd)
        break;
John Koleszar's avatar
John Koleszar committed
782 783
    }
  }
John Koleszar's avatar
John Koleszar committed
784

785
  if (total_rd >= best_rd)
786
    return INT64_MAX;
787

John Koleszar's avatar
John Koleszar committed
788
  *Rate = cost;
789
  *rate_y = tot_rate_y;
John Koleszar's avatar
John Koleszar committed
790
  *Distortion = distortion;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
791
  xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
John Koleszar's avatar
John Koleszar committed
792

John Koleszar's avatar
John Koleszar committed
793
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);</