vp9_rdopt.c 122 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
50 51
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
52

53 54
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
55

56 57
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
58

59 60
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
61

62 63
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65 66 67 68 69 70 71 72
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
73

74
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77 78
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
79

80 81 82
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
83

Yaowu Xu's avatar
Yaowu Xu committed
84
  {I4X4_PRED,    INTRA_FRAME,  NONE},
85

John Koleszar's avatar
John Koleszar committed
86 87 88 89
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
90

John Koleszar's avatar
John Koleszar committed
91 92 93
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
94

John Koleszar's avatar
John Koleszar committed
95 96 97
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
98

John Koleszar's avatar
John Koleszar committed
99 100 101
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
102

John Koleszar's avatar
John Koleszar committed
103 104
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
105
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
106 107
};

108
static void fill_token_costs(vp9_coeff_count *c,
109
                             vp9_coeff_probs_model *p,
110
                             TX_SIZE tx_size) {
111
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
112

113
  for (i = 0; i < BLOCK_TYPES; i++)
114 115
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
116 117
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
118
          vp9_model_to_full_probs(p[i][j][k][l], probs);
119 120 121
          vp9_cost_tokens_skip((int *)c[i][j][k][l], probs,
                               vp9_coef_tree);
        }
122 123
}

124 125 126 127
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
128

129
// 3* dc_qlookup[Q]*dc_qlookup[Q];
130

131
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
132 133 134
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

135
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
136 137 138 139 140 141 142
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
143
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
144
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
145
  }
Paul Wilkins's avatar
Paul Wilkins committed
146
}
John Koleszar's avatar
John Koleszar committed
147

148
static int compute_rd_mult(int qindex) {
149
  const int q = vp9_dc_quant(qindex, 0);
150
  return (11 * q * q) >> 2;
151 152
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
153 154 155
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
156 157
}

158

Dmitry Kovalev's avatar
Dmitry Kovalev committed
159
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
160
  int q, i;
John Koleszar's avatar
John Koleszar committed
161

162
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
163

John Koleszar's avatar
John Koleszar committed
164 165 166 167
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
168
  qindex = clamp(qindex, 0, MAXQ);
169

Dmitry Kovalev's avatar
Dmitry Kovalev committed
170
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
171 172 173 174 175
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
177
  }
178
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
179
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
180

181
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
182

Dmitry Kovalev's avatar
Dmitry Kovalev committed
183 184
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
185 186
  if (q < 8)
    q = 8;
187

John Koleszar's avatar
John Koleszar committed
188 189 190
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
191

John Koleszar's avatar
John Koleszar committed
192 193 194 195 196 197 198
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
199
    }
John Koleszar's avatar
John Koleszar committed
200 201
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
202

John Koleszar's avatar
John Koleszar committed
203 204 205 206 207 208 209
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
210
    }
John Koleszar's avatar
John Koleszar committed
211
  }
John Koleszar's avatar
John Koleszar committed
212

213
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
214
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
215
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
216
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
217
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
218
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
219
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
220
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
221

222
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
223 224 225 226
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
227 228
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
229
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
230

231
  if (cpi->common.frame_type != KEY_FRAME) {
232
    vp9_build_nmv_cost_table(
233 234 235 236 237 238
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
239 240
}

241
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
242
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
243

244
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
245 246 247
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
248

John Koleszar's avatar
John Koleszar committed
249
  return error;
John Koleszar's avatar
John Koleszar committed
250 251
}

252
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
253
                              int plane, int block, PLANE_TYPE type,
254 255
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
256 257
                              TX_SIZE tx_size,
                              int y_blocks) {
258
  MACROBLOCKD *const xd = &mb->e_mbd;
259 260
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
261
  int c = 0;
262 263
  int cost = 0, pad;
  const int *scan, *nb;
264 265 266
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
267
  const int ref = mbmi->ref_frame != INTRA_FRAME;
268
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
269
      mb->token_costs[tx_size][type][ref];
270
  ENTROPY_CONTEXT above_ec, left_ec;
271
  TX_TYPE tx_type = DCT_DCT;
272

273
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
274
  vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
275 276
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
277
  const uint8_t * band_translate;
278 279

  // Check for consistency of tx_size with mode info
280
  assert((!type && !plane) || (type && plane));
281 282 283 284 285 286 287
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

288
  switch (tx_size) {
289
    case TX_4X4: {
290
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
291
          get_tx_type_4x4(xd, block) : DCT_DCT;
292 293
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
294
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
295
                                 coef_probs);
296
      seg_eob = 16;
297
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
298
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
299
      break;
300
    }
301 302
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
303
      const int sz = 1 + b_width_log2(sb_type);
304
      const int x = block & ((1 << sz) - 1), y = block - x;
305 306
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
307 308
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
309
      scan = get_scan_8x8(tx_type);
310
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
311
                                 coef_probs);
312
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
313
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
314
      break;
315 316 317
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
318
      const int sz = 2 + b_width_log2(sb_type);
319
      const int x = block & ((1 << sz) - 1), y = block - x;
320 321
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
322
      scan = get_scan_16x16(tx_type);
323
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
324
                                 coef_probs);
325
      seg_eob = 256;
326 327
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
328
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
329
      break;
330
    }
331
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
332
      scan = vp9_default_scan_32x32;
333
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
334
                                 coef_probs);
335
      seg_eob = 1024;
336 337
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
338
      band_translate = vp9_coefband_trans_8x8plus;
339
      break;
Daniel Kang's avatar
Daniel Kang committed
340
    default:
341
      abort();
Daniel Kang's avatar
Daniel Kang committed
342 343
      break;
  }
John Koleszar's avatar
John Koleszar committed
344
  assert(eob <= seg_eob);
345

346
  pt = combine_entropy_contexts(above_ec, left_ec);
347 348
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
349

350 351
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
352

353 354 355 356
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

357
  {
358
    for (c = 0; c < eob; c++) {
359
      int v = qcoeff_ptr[scan[c]];
360
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
361
      int band = get_coef_band(band_translate, c);
362 363
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
364

365
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
366

367
      if (!c || token_cache[scan[c - 1]])
368
        cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
369
      token_cache[scan[c]] = t;
370
    }
371 372 373 374
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
Paul Wilkins's avatar
Paul Wilkins committed
375
          [get_coef_band(band_translate, c)]
376 377
          [pt][DCT_EOB_TOKEN];
    }
378 379
  }

380 381 382
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
383
  }
384

385 386 387
  return cost;
}

388
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
389 390 391 392 393
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
394 395 396
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
397
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
398 399
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
400
  int s0, s1;
401 402 403 404 405 406 407 408 409 410

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
411

412 413 414
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
415

416 417 418 419 420 421
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
422 423 424
    }
  }

425 426 427 428 429 430
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
431 432 433 434 435 436
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
437
    mbmi->txfm_size = TX_16X16;
438
  } else if (cm->txfm_mode == ALLOW_8X8 ||
439 440
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
441
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
442 443 444 445 446
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

447
  *distortion = d[mbmi->txfm_size];
448
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
449 450
  *skip       = s[mbmi->txfm_size];

451 452
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
453 454
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
455 456 457 458
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
459 460
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
461
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
462
  else
463 464
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
465 466
}

467 468
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
469 470 471 472
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
473
    int this_diff = coeff[i] - dqcoeff[i];
474
    error += (unsigned)this_diff * this_diff;
475
  }
476
  error >>= shift;
477

Frank Galligan's avatar
Frank Galligan committed
478
  return error > INT_MAX ? INT_MAX : (int)error;
479 480
}

481 482
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
483
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
484
                     16 << (bwl + bhl), shift);
485
}
486

487 488 489 490
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
491

492 493 494 495 496 497 498 499
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
500 501
}

502 503
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
504
  MACROBLOCKD *const xd = &x->e_mbd;
505 506 507
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
508
  ENTROPY_CONTEXT t_above[16], t_left[16];
509
  int block, cost;
510

511
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
512
             sizeof(ENTROPY_CONTEXT) * bw);
513
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
514
             sizeof(ENTROPY_CONTEXT) * bh);
515

516 517 518
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
519

520 521
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
522

523 524 525
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
526
  }
527 528 529 530

  return cost;
}

531 532 533
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
534

535 536
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
537 538
  }
  return cost;
539 540
}

541 542 543
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
544
  MACROBLOCKD *const xd = &x->e_mbd;
545 546
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
547

548 549
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
550
  *skippable  = vp9_sby_is_skippable(xd, bsize);
551 552
}

553 554
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
555
                            int *skip, BLOCK_SIZE_TYPE bs,
556
                            int64_t txfm_cache[NB_TXFM_MODES]) {
557 558
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
559

560
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
561

562
  if (bs >= BLOCK_SIZE_SB32X32)
563 564
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
565
  if (bs >= BLOCK_SIZE_MB16X16)
566 567 568 569 570 571
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
572 573

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
574
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
575
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
576
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
577

578 579
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
580 581 582
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
583 584 585
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
586
  MACROBLOCKD *xd = &x->e_mbd;
587
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
588 589
  int rate = 0;
  int distortion;
590
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
591
  const int src_stride = x->plane[0].src.stride;
592 593 594 595 596
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
597 598
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
599 600 601 602
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
603

Jingning Han's avatar
Jingning Han committed
604
  assert(ib < 4);
605

606 607
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
608
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
609 610

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
611
    int64_t this_rd;
612
    int ratey = 0;
613

Scott LaVarnway's avatar
Scott LaVarnway committed
614
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
    if (cm->frame_type == KEY_FRAME)
      rate = bmode_costs[mode];
    else
      rate = x->mbmode_cost[cm->frame_type][mode];
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
        vp9_intra4x4_predict(xd, block,
                             BLOCK_SIZE_SB8X8,
                             mode, dst, xd->plane[0].dst.stride);
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
651

652 653 654 655
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
656

657 658
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                             dst, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
659

660 661 662 663 664 665 666 667
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
668

669 670
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
671

672 673 674 675 676 677 678
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
679 680 681 682 683 684 685 686 687 688
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
689
    }
John Koleszar's avatar
John Koleszar committed
690
  }
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
710
  }
John Koleszar's avatar
John Koleszar committed
711

John Koleszar's avatar
John Koleszar committed
712
  return best_rd;
John Koleszar's avatar
John Koleszar committed
713 714
}

715 716
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
717
                                         int *Distortion, int64_t best_rd) {
718
  int i, j;
John Koleszar's avatar
John Koleszar committed
719
  MACROBLOCKD *const xd = &mb->e_mbd;
720 721 722 723
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
724
  int cost = 0;
John Koleszar's avatar
John Koleszar committed
725 726 727
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
728
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
729 730
  int *bmode_costs;

731 732
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
733

Yaowu Xu's avatar
Yaowu Xu committed
734
  xd->mode_info_context->mbmi.mode = I4X4_PRED;
John Koleszar's avatar
John Koleszar committed
735
  bmode_costs = mb->inter_bmode_costs;
736

737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
  for (idy = 0; idy < 2; idy += bh) {
    for (idx = 0; idx < 2; idx += bw) {
      MODE_INFO *const mic = xd->mode_info_context;
      const int mis = xd->mode_info_stride;
      B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
      int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
      int UNINITIALIZED_IS_SAFE(d);
      i = idy * 2 + idx;

      if (xd->frame_type == KEY_FRAME) {
        const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                     left_block_mode(mic, i) : DC_PRED;

        bmode_costs  = mb->bmode_costs[A][L];
      }
753

754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
      total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
                                        t_above + idx, t_left + idy,
                                        &r, &ry, &d, bsize);
      cost += r;
      distortion += d;
      tot_rate_y += ry;

      mic->bmi[i].as_mode.first = best_mode;
      for (j = 1; j < bh; ++j)
        mic->bmi[i + j * 2].as_mode.first = best_mode;
      for (j = 1; j < bw; ++j)
        mic->bmi[i + j].as_mode.first = best_mode;

      if (total_rd >= best_rd)
        break;
John Koleszar's avatar
John Koleszar committed
769 770
    }
  }
John Koleszar's avatar
John Koleszar committed
771

772
  if (total_rd >= best_rd)
773
    return INT64_MAX;
774

John Koleszar's avatar
John Koleszar committed
775
  *Rate = cost;
776
  *rate_y = tot_rate_y;
John Koleszar's avatar
John Koleszar committed