vp9_rdopt.c 154 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36 37 38 39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45
#define INVALID_MV 0x80008000

46 47 48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49 50 51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
  {NEARESTMV, LAST_FRAME,   NONE},
57 58
  {DC_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
59 60 61 62
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
  {NEWMV,     LAST_FRAME,   NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
63
  {NEARMV,    LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
64
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
65

Paul Wilkins's avatar
Paul Wilkins committed
66 67
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
68
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
69

70 71
  {TM_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
72 73 74 75 76
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
77

78 79 80
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
Paul Wilkins's avatar
Paul Wilkins committed
81 82
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
83

Paul Wilkins's avatar
Paul Wilkins committed
84 85 86
  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
Ronald S. Bultje's avatar
Ronald S. Bultje committed
87
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
88
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
89

Paul Wilkins's avatar
Paul Wilkins committed
90 91 92 93 94 95 96 97 98
  {I4X4_PRED, INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
99 100
};

101 102 103 104
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
105
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
106 107 108 109 110 111
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

112
static void fill_token_costs(vp9_coeff_cost *c,
113
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
114
  int i, j, k, l;
115 116 117 118 119 120 121 122
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
123
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
124
                            vp9_coef_tree);
125
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
126
                                 vp9_coef_tree);
127 128
            assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
129
          }
130 131
}

132 133 134 135 136 137
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
138

139
// 3* dc_qlookup[Q]*dc_qlookup[Q];
140

141
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
142 143 144
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

145
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
146 147 148 149 150 151 152
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
153
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
154
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
155
  }
Paul Wilkins's avatar
Paul Wilkins committed
156
}
John Koleszar's avatar
John Koleszar committed
157

158
static int compute_rd_mult(int qindex) {
159
  const int q = vp9_dc_quant(qindex, 0);
160
  return (11 * q * q) >> 2;
161 162
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
163 164 165
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
166 167
}

168

Dmitry Kovalev's avatar
Dmitry Kovalev committed
169
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
170
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
171

172
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
173

John Koleszar's avatar
John Koleszar committed
174 175 176 177
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
178
  qindex = clamp(qindex, 0, MAXQ);
179

Dmitry Kovalev's avatar
Dmitry Kovalev committed
180
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
181 182 183 184 185
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
186
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
187
  }
188
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
189
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
190

191
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
192

Dmitry Kovalev's avatar
Dmitry Kovalev committed
193 194
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
195 196
  if (q < 8)
    q = 8;
197

John Koleszar's avatar
John Koleszar committed
198 199 200
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
201

202
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
203 204 205 206 207 208 209 210 211 212 213 214 215 216
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
217

218
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
219 220 221
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
222
      }
John Koleszar's avatar
John Koleszar committed
223
    }
John Koleszar's avatar
John Koleszar committed
224 225
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
226

227
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
228 229 230 231 232 233 234 235 236 237 238 239 240
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
Paul Wilkins's avatar
Paul Wilkins committed
241

242
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
243 244 245
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
246
      }
John Koleszar's avatar
John Koleszar committed
247
    }
John Koleszar's avatar
John Koleszar committed
248
  }
John Koleszar's avatar
John Koleszar committed
249

250
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
251

252
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
253
    vp9_cost_tokens(cpi->mb.partition_cost[i],
254
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
255 256
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
257
  /*rough estimate for costing*/
258
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
259

260
  if (cpi->common.frame_type != KEY_FRAME) {
261
    vp9_build_nmv_cost_table(
262 263 264 265 266
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
267 268 269 270 271 272

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
        cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
273
            cost_token(vp9_inter_mode_tree,
274
                       cpi->common.fc.inter_mode_probs[i],
275
                       vp9_inter_mode_encodings - NEARESTMV + m);
276
    }
277
  }
John Koleszar's avatar
John Koleszar committed
278 279
}

Yaowu Xu's avatar
Yaowu Xu committed
280
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
281 282
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
283 284 285
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
286 287
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
288 289
  } else {
    double a = y - d;
290 291
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
292 293 294
  }
}

295 296 297 298 299
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
300
  // Normalized rate
301
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
325
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
348 349 350 351 352
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
353
  assert(x >= 0.0);
354 355
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
374 375 376
    model_rd_norm(x, &R, &D);
    *rate = ((n << 8) * R + 0.5);
    *dist = (var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
  }
  vp9_clear_system_state();
}

static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
392
    const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
393 394 395 396 397 398
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
Dmitry Kovalev's avatar
Dmitry Kovalev committed
399
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
400
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
401 402 403 404 405 406 407 408 409 410 411 412 413 414

    rate_sum += rate;
    dist_sum += dist;
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
415
  int t = 4, j, k;
416
  BLOCK_SIZE_TYPE bs = BLOCK_4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
417 418
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
Jim Bankoski's avatar
Jim Bankoski committed
419 420
  const int width = plane_block_width(bsize, pd);
  const int height = plane_block_height(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
  int rate_sum = 0;
  int64_t dist_sum = 0;

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
    t = 4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
    t = 8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
    t = 16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
    t = 32;
  } else {
    assert(0);
  }
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
440 441
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
      int rate;
      int64_t dist;
      unsigned int sse;
      (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
                                p->src.stride,
                                pd->dst.buf + j * pd->dst.stride + k,
                                pd->dst.stride, &sse);
      // sse works better than var, since there is no dc prediction used
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
                               &rate, &dist);
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
  *out_rate_sum = rate_sum;
  *out_dist_sum = (dist_sum << 4);
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
461
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
462
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
463
  int i;
464
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
465

466
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
467
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
468
    error += (unsigned)this_diff * this_diff;
469
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
470
  }
John Koleszar's avatar
John Koleszar committed
471

472
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
473
  return error;
John Koleszar's avatar
John Koleszar committed
474 475
}

476 477 478 479 480
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
481
static const int16_t band_counts[TX_SIZES][8] = {
482 483 484 485
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
486 487
};

488
static INLINE int cost_coeffs(MACROBLOCK *mb,
489
                              int plane, int block, PLANE_TYPE type,
490
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
491
                              TX_SIZE tx_size,
492
                              const int16_t *scan, const int16_t *nb) {
493
  MACROBLOCKD *const xd = &mb->e_mbd;
494
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
495
  int pt, c, cost;
496
  const int16_t *band_count = &band_counts[tx_size][1];
497
  const int eob = xd->plane[plane].eobs[block];
498
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
499
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
500
  unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS]
501
                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
502
  ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
503
  uint8_t token_cache[1024];
504 505

  // Check for consistency of tx_size with mode info
506
  assert((!type && !plane) || (type && plane));
507 508 509
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
510
    assert(tx_size == get_uv_tx_size(mbmi));
511 512
  }

513
  pt = combine_entropy_contexts(above_ec, left_ec);
514

515 516
  if (eob == 0) {
    // single eob token
517 518
    cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
    c = 0;
519
  } else {
520
    int v, prev_t, band_left = *band_count++;
521 522 523

    // dc token
    v = qcoeff_ptr[0];
524
    prev_t = vp9_dct_value_tokens_ptr[v].token;
525
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
526
    token_cache[0] = vp9_pt_energy_class[prev_t];
527
    ++token_costs;
528 529 530 531

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
532
      int t;
533 534 535

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
536
      pt = get_coef_context(nb, token_cache, c);
537
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
538
      token_cache[rc] = vp9_pt_energy_class[t];
539
      prev_t = t;
540
      if (!--band_left) {
541 542
        band_left = *band_count++;
        ++token_costs;
543
      }
544
    }
545 546

    // eob token
547
    if (band_left) {
548
      pt = get_coef_context(nb, token_cache, c);
549
      cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
550
    }
551 552
  }

553
  // is eob first coefficient;
554
  *A = *L = c > 0;
555

556 557 558
  return cost;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
559 560 561 562 563 564 565 566 567 568 569 570 571
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int rate;
  int64_t dist;
  int64_t sse;
  int64_t best_rd;
  int skip;
572
  const int16_t *scan, *nb;
Deb Mukherjee's avatar
Deb Mukherjee committed
573 574 575 576 577 578 579 580 581 582 583
};

static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
584 585
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Deb Mukherjee's avatar
Deb Mukherjee committed
586 587 588
  args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                &this_sse) >> shift;
  args->sse += this_sse >> shift;
589 590 591 592 593 594 595 596 597

  if (x->skip_encode &&
      xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
                    (1 << ss_txfrm_size)) >> shift;
    args->dist += p;
    args->sse  += p;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
598 599 600 601 602 603 604 605 606 607 608
}

static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

609
  args->rate += cost_coeffs(args->x, plane, block,
Deb Mukherjee's avatar
Deb Mukherjee committed
610 611
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
612
                            args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
613 614
}

615 616
// FIXME(jingning): need to make the rd test of chroma components consistent
// with that of luma component. this function should be deprecated afterwards.
Deb Mukherjee's avatar
Deb Mukherjee committed
617 618
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
619 620 621 622 623
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *pd = &xd->plane[plane];
  const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
624
  int i;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
625 626 627
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size,
                                    num_4x4_blocks_wide, num_4x4_blocks_high,
                                    0, 0, 0, INT64_MAX, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
628

629 630
  switch (tx_size) {
    case TX_4X4:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
631 632 633 634
      vpx_memcpy(&args.t_above, pd->above_context,
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
      vpx_memcpy(&args.t_left, pd->left_context,
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
635 636 637 638
      args.scan = vp9_default_scan_4x4;
      args.nb = vp9_default_scan_4x4_neighbors;
      break;
    case TX_8X8:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
639 640 641 642
      for (i = 0; i < num_4x4_blocks_wide; i += 2)
        args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
      for (i = 0; i < num_4x4_blocks_high; i += 2)
        args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
643 644 645 646
      args.scan = vp9_default_scan_8x8;
      args.nb = vp9_default_scan_8x8_neighbors;
      break;
    case TX_16X16:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
647 648 649 650
      for (i = 0; i < num_4x4_blocks_wide; i += 4)
        args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
      for (i = 0; i < num_4x4_blocks_high; i += 4)
        args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
651 652 653 654
      args.scan = vp9_default_scan_16x16;
      args.nb = vp9_default_scan_16x16_neighbors;
      break;
    case TX_32X32:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
655 656 657 658
      for (i = 0; i < num_4x4_blocks_wide; i += 8)
        args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
      for (i = 0; i < num_4x4_blocks_high; i += 8)
        args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
659 660 661 662 663 664
      args.scan = vp9_default_scan_32x32;
      args.nb = vp9_default_scan_32x32_neighbors;
      break;
    default:
      assert(0);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686

  foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
  return args.rate;
}

static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;

  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
  }
  return cost;
}

static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                                int shift, int64_t *sse) {
  int64_t sum = 0, this_sse;
  int plane;

  *sse = 0;
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
687 688 689 690
    struct macroblockd_plane *pd = &x->e_mbd.plane[plane];
    const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
    sum += vp9_block_error(x->plane[plane].coeff, pd->dqcoeff,
                           1 << num_pels_log2_lookup[bs], &this_sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
691 692 693 694 695 696 697 698 699 700 701 702
    *sse += this_sse;
  }
  *sse >>= shift;
  return sum >> shift;
}

static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct encode_b_args encode_args = {args->cm, x, NULL};
703
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
704

705 706
  if (args->skip)
    return;
707 708 709 710
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
  rd = MIN(rd1, rd2);
  if (rd > args->best_rd) {
711 712 713 714 715 716 717
    args->skip = 1;
    args->rate = INT_MAX;
    args->dist = INT64_MAX;
    args->sse  = INT64_MAX;
    return;
  }

Deb Mukherjee's avatar
Deb Mukherjee committed
718 719 720 721 722 723 724 725 726 727 728 729
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
    encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
  else
    xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);

  dist_block(plane, block, bsize, ss_txfrm_size, args);
  rate_block(plane, block, bsize, ss_txfrm_size, args);
}

static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skippable, int64_t *sse,
730
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
731 732 733
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *const pd = &xd->plane[0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
734 735 736
  const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
737
  int i;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
738 739
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size,
                                    num_4x4_blocks_wide, num_4x4_blocks_high,
740
                                    0, 0, 0, ref_best_rd, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
741
  xd->mode_info_context->mbmi.txfm_size = tx_size;
742 743 744
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(&args.t_above, pd->above_context,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
745
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
746
      vpx_memcpy(&args.t_left, pd->left_context,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
747
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
748 749
      get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, 0),
                      &args.scan, &args.nb);
750 751
      break;
    case TX_8X8:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
752
      for (i = 0; i < num_4x4_blocks_wide; i += 2)
753
        args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
754
      for (i = 0; i < num_4x4_blocks_high; i += 2)
755
        args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
756 757
      get_scan_nb_8x8(get_tx_type_8x8(PLANE_TYPE_Y_WITH_DC, xd),
                      &args.scan, &args.nb);
758 759
      break;
    case TX_16X16:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
760
      for (i = 0; i < num_4x4_blocks_wide; i += 4)
761
        args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
762
      for (i = 0; i < num_4x4_blocks_high; i += 4)
763
        args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
764 765
      get_scan_nb_16x16(get_tx_type_16x16(PLANE_TYPE_Y_WITH_DC, xd),
                        &args.scan, &args.nb);
766 767
      break;
    case TX_32X32:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
768
      for (i = 0; i < num_4x4_blocks_wide; i += 8)
769
        args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
770
      for (i = 0; i < num_4x4_blocks_high; i += 8)
771 772 773 774 775 776 777
        args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
      args.scan = vp9_default_scan_32x32;
      args.nb = vp9_default_scan_32x32_neighbors;
      break;
    default:
      assert(0);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
778 779 780 781 782

  foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
  *distortion = args.dist;
  *rate       = args.rate;
  *sse        = args.sse;
783
  *skippable  = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
784 785 786 787 788
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
789
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
790 791
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
792
      - (bs < BLOCK_32X32) - (bs < BLOCK_16X16);
Deb Mukherjee's avatar
Deb Mukherjee committed
793 794 795 796
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  if (max_txfm_size == TX_32X32 &&
797 798
      (cm->tx_mode == ALLOW_32X32 ||
       cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
799 800
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
801 802 803
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
804
    mbmi->txfm_size = TX_16X16;
805
  } else if (cm->tx_mode != ONLY_4X4) {
Deb Mukherjee's avatar
Deb Mukherjee committed
806 807 808 809 810
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
  super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
811
                           &sse[mbmi->txfm_size], ref_best_rd, bs,
Deb Mukherjee's avatar
Deb Mukherjee committed
812 813 814 815
                           mbmi->txfm_size);
  cpi->txfm_stepdown_count[0]++;
}

816
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
817
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
818
                                     int64_t *d, int64_t *distortion,
819
                                     int *s, int *skip,
820
                                     int64_t tx_cache[TX_MODES],
Deb Mukherjee's avatar
Deb Mukherjee committed
821
                                     BLOCK_SIZE_TYPE bs) {
822
  const TX_SIZE max_tx_size = TX_32X32
823
      - (bs < BLOCK_32X32) - (bs < BLOCK_16X16);
824 825 826
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
827
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
828
  int64_t rd[TX_SIZES][2];
829
  int n, m;
830
  int s0, s1;
831

832
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
833

834
  for (n = TX_4X4; n <= max_tx_size; n++) {
835
    r[n][1] = r[n][0];
836 837
    if (r[n][0] == INT_MAX)
      continue;
838
    for (m = 0; m <= n - (n == max_tx_size); m++) {
839
      if (m == n)
840
        r[n][1] += vp9_cost_zero(tx_probs[m]);
841
      else
842
        r[n][1] += vp9_cost_one(tx_probs[m]);
843 844
    }
  }
845

846 847 848
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
849

850
  for (n = TX_4X4; n <= max_tx_size; n++) {
851 852 853 854
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
855 856 857 858 859
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
860 861 862
    }
  }

863
  if (max_tx_size == TX_32X32 &&
864 865
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
866 867 868
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
869
  } else if (max_tx_size >= TX_16X16 &&
870 871 872
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
873 874
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
875
    mbmi->txfm_size = TX_16X16;
876 877 878 879
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
880 881 882 883 884
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

885
  *distortion = d[mbmi->txfm_size];
886
  *rate       = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
887 888
  *skip       = s[mbmi->txfm_size];

889 890 891 892 893
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
  if (max_tx_size == TX_32X32 &&
894 895
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])