vp9_rdopt.c 166 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17 18 19
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
20 21 22 23 24
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
25 26 27 28 29 30
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
31
#include "vpx_mem/vpx_mem.h"
32 33 34 35 36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
37
#include "./vp9_rtcd.h"
38
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
39
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41 42
#define INVALID_MV 0x80008000

43 44 45
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

46 47 48
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
49

Paul Wilkins's avatar
Paul Wilkins committed
50 51
#define MIN_EARLY_TERM_INDEX    3

52
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},

  {DC_PRED,   INTRA_FRAME,  NONE},

  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},

  {NEARMV,    LAST_FRAME,   NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},

  {TM_PRED,   INTRA_FRAME,  NONE},

  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},

  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},

  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D207_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
};

const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
  {LAST_FRAME,   NONE},
  {GOLDEN_FRAME, NONE},
  {ALTREF_FRAME, NONE},
  {LAST_FRAME,   ALTREF_FRAME},
  {GOLDEN_FRAME, ALTREF_FRAME},
  {INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
99 100
};

101 102 103 104
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
105
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
106 107
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

Deb Mukherjee's avatar
Deb Mukherjee committed
108 109 110
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
#define RD_THRESH_POW      1.25
111
#define RD_MULT_EPB_RATIO  64
Deb Mukherjee's avatar
Deb Mukherjee committed
112

113 114
#define MV_COST_WEIGHT      108
#define MV_COST_WEIGHT_SUB  120
115

116 117 118 119 120 121 122 123 124
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
125
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
126 127 128
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
static void fill_mode_costs(VP9_COMP *c) {
  VP9_COMMON *const cm = &c->common;
  int i, j;

  for (i = 0; i < INTRA_MODES; i++)
    for (j = 0; j < INTRA_MODES; j++)
      vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
                      vp9_intra_mode_tree);

  // TODO(rbultje) separate tables for superblock costing?
  vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1],
                  vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
                  cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
                  vp9_kf_uv_mode_prob[INTRA_MODES - 1],
                  vp9_intra_mode_tree);

  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
    vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
                    cm->fc.switchable_interp_prob[i],
                    vp9_switchable_interp_tree);
}

153
static void fill_token_costs(vp9_coeff_cost *c,
154
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
155
  int i, j, k, l;
156
  TX_SIZE t;
157
  for (t = TX_4X4; t <= TX_32X32; ++t)
158
    for (i = 0; i < PLANE_TYPES; ++i)
159 160 161
      for (j = 0; j < REF_TYPES; ++j)
        for (k = 0; k < COEF_BANDS; ++k)
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
162 163
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
164
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
165
                            vp9_coef_tree);
166
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
167
                                 vp9_coef_tree);
168 169
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
                   c[t][i][j][k][1][l][EOB_TOKEN]);
170
          }
171 172
}

173 174 175 176 177 178
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
179

180
// 3* dc_qlookup[Q]*dc_qlookup[Q];
181

182
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
183 184 185
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

186
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
187 188 189 190 191 192 193
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
194
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
195
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
196
  }
Paul Wilkins's avatar
Paul Wilkins committed
197
}
John Koleszar's avatar
John Koleszar committed
198

199
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
200
  const int q = vp9_dc_quant(qindex, 0);
201
  // TODO(debargha): Adjust the function below
202 203 204 205 206 207 208 209
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
210 211 212 213 214 215 216 217 218
}

static int compute_rd_thresh_factor(int qindex) {
  int q;
  // TODO(debargha): Adjust the function below
  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  if (q < 8)
    q = 8;
  return q;
219 220
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
221 222 223
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
224 225
}

226 227 228 229 230 231 232 233 234
static void set_block_thresholds(VP9_COMP *cpi) {
  int i, bsize, segment_id;
  VP9_COMMON *cm = &cpi->common;

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    int q;
    int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
    segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
    q = compute_rd_thresh_factor(segment_qindex);
235

236
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
237 238 239
      // Threshold here seem unecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[]
      int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
John Koleszar's avatar
John Koleszar committed
240

241 242 243 244 245 246 247 248
      for (i = 0; i < MAX_MODES; ++i) {
        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[segment_id][bsize][i] =
              cpi->sf.thresh_mult[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
        }
John Koleszar's avatar
John Koleszar committed
249
      }
250

251 252 253 254 255 256 257 258
      for (i = 0; i < MAX_REFS; ++i) {
        if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
              cpi->sf.thresh_mult_sub8x8[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
        }
259 260
      }
    }
John Koleszar's avatar
John Koleszar committed
261
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
262 263
}

264 265 266
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  int qindex, i;
Deb Mukherjee's avatar
Deb Mukherjee committed
267 268 269 270 271 272 273

  vp9_clear_system_state();  // __asm emms;

  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
274
  qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
Deb Mukherjee's avatar
Deb Mukherjee committed
275

276
  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
277 278
  cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);

279
  cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
Deb Mukherjee's avatar
Deb Mukherjee committed
280 281 282 283
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);

  vp9_set_speed_features(cpi);

284 285
  cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                              cm->frame_type != KEY_FRAME) ?
286
                              0 : 1;
287

288
  set_block_thresholds(cpi);
John Koleszar's avatar
John Koleszar committed
289

290
  fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
291

292
  for (i = 0; i < PARTITION_CONTEXTS; i++)
293
    vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
294 295
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
296
  /*rough estimate for costing*/
297
  fill_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
298

299
  if (!frame_is_intra_only(cm)) {
300
    vp9_build_nmv_cost_table(
301
        cpi->mb.nmvjointcost,
302 303 304
        cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cm->fc.nmvc,
        cm->allow_high_precision_mv, 1, 1);
305

306 307 308
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
      vp9_cost_tokens((int *)cpi->mb.inter_mode_cost[i],
                      cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
309
  }
John Koleszar's avatar
John Koleszar committed
310 311
}

Yaowu Xu's avatar
Yaowu Xu committed
312
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
313 314
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
315 316 317
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
318 319
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
320 321
  } else {
    double a = y - d;
322 323
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
324 325 326
  }
}

327 328 329 330 331
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
332
  // Normalized rate
333
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
357
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
380 381 382 383 384
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
385
  assert(x >= 0.0);
386 387
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
406
    model_rd_norm(x, &R, &D);
Yaowu Xu's avatar
Yaowu Xu committed
407 408
    *rate = (int)((n << 8) * R + 0.5);
    *dist = (int)(var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
409 410 411 412
  }
  vp9_clear_system_state();
}

413
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
414 415 416 417 418 419 420 421 422 423
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
424
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
425 426 427 428 429 430
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
Dmitry Kovalev's avatar
Dmitry Kovalev committed
431
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
432
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
433 434

    rate_sum += rate;
Yaowu Xu's avatar
Yaowu Xu committed
435
    dist_sum += (int)dist;
Deb Mukherjee's avatar
Deb Mukherjee committed
436 437 438 439 440 441
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

442
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
443 444 445 446
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
447
  int j, k;
448
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
449 450
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
451 452
  const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 << num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
453 454
  int rate_sum = 0;
  int64_t dist_sum = 0;
455
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
456 457 458 459 460 461 462 463 464 465 466 467

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
468

Deb Mukherjee's avatar
Deb Mukherjee committed
469
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
470 471
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
472 473 474
      int rate;
      int64_t dist;
      unsigned int sse;
475 476 477
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
478
      // sse works better than var, since there is no dc prediction used
479
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
480 481 482 483 484
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
485

Deb Mukherjee's avatar
Deb Mukherjee committed
486
  *out_rate_sum = rate_sum;
487
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
488 489
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
490
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
491
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
492
  int i;
493
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
494

495
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
496
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
497
    error += (unsigned)this_diff * this_diff;
498
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
499
  }
John Koleszar's avatar
John Koleszar committed
500

501
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
502
  return error;
John Koleszar's avatar
John Koleszar committed
503 504
}

505 506 507 508 509
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
510
static const int16_t band_counts[TX_SIZES][8] = {
511 512 513 514
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
515 516
};

517
static INLINE int cost_coeffs(MACROBLOCK *x,
518
                              int plane, int block,
519
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
520
                              TX_SIZE tx_size,
521
                              const int16_t *scan, const int16_t *nb) {
522
  MACROBLOCKD *const xd = &x->e_mbd;
523
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
524
  struct macroblock_plane *p = &x->plane[plane];
525 526
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
527
  const int16_t *band_count = &band_counts[tx_size][1];
528
  const int eob = p->eobs[block];
529
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
530
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
531
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
532
                   x->token_costs[tx_size][type][ref];
533
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
534
  uint8_t *p_tok = x->token_cache;
535 536
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
537 538

  // Check for consistency of tx_size with mode info
539
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
540
                                      : get_uv_tx_size(mbmi) == tx_size);
541

542 543
  if (eob == 0) {
    // single eob token
544
    cost = token_costs[0][0][pt][EOB_TOKEN];
545
    c = 0;
546
  } else {
547
    int band_left = *band_count++;
548 549

    // dc token
550 551
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
552
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
553
    p_tok[0] = vp9_pt_energy_class[prev_t];
554
    ++token_costs;
555 556 557 558

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
559
      int t;
560 561 562

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
563
      pt = get_coef_context(nb, p_tok, c);
564
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
565
      p_tok[rc] = vp9_pt_energy_class[t];
566
      prev_t = t;
567
      if (!--band_left) {
568 569
        band_left = *band_count++;
        ++token_costs;
570
      }
571
    }
572 573

    // eob token
574
    if (band_left) {
575
      pt = get_coef_context(nb, p_tok, c);
576
      cost += (*token_costs)[0][pt][EOB_TOKEN];
577
    }
578 579
  }

580
  // is eob first coefficient;
581
  *A = *L = (c > 0);
582

583 584 585
  return cost;
}

586
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
587
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
588 589 590
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
591 592
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
593 594
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
595 596
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
597 598 599
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
600

601
  if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
602 603
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
604
                    (1 << ss_txfrm_size)) >> (shift + 2);
605 606
    args->dist += (p >> 4);
    args->sse  += p;
607
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
608 609
}

610
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
611
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
612
  struct rdcost_block_args* args = arg;
613

Deb Mukherjee's avatar
Deb Mukherjee committed
614
  int x_idx, y_idx;
615
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
616

617 618 619
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                           args->t_left + y_idx, args->tx_size,
                           args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
620 621
}

622 623
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
624 625 626
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
627
  struct encode_b_args encode_args = {x, NULL};
628
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
629

630 631 632
  if (args->skip)
    return;

633
  if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
634
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
635
  else
636
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
637

638 639
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
640 641
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
642 643

  // TODO(jingning): temporarily enabled only for luma component
644
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
645
  if (plane == 0)
646
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
647
                                    (rd1 > rd2 && !xd->lossless);
648

649 650 651
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
652 653 654 655 656 657
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
658 659
}

660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
      assert(!"Invalid transform size.");
  }
}

693 694 695 696 697 698 699 700 701 702 703 704
static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
                              const int num_4x4_w, const int num_4x4_h,
                              const int64_t ref_rdcost,
                              struct rdcost_block_args *arg) {
  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
  arg->x = x;
  arg->tx_size = tx_size;
  arg->bw = num_4x4_w;
  arg->bh = num_4x4_h;
  arg->best_rd = ref_rdcost;
}

705
static void txfm_rd_in_plane(MACROBLOCK *x,
706
                             struct rdcost_block_args *rd_stack,
707 708 709
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
710
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
711
  MACROBLOCKD *const xd = &x->e_mbd;
712
  struct macroblockd_plane *const pd = &xd->plane[plane];
713
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
714 715
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
716
  const scan_order *so;
717

718 719
  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
                    ref_best_rd, rd_stack);
720
  if (plane == 0)
721
    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
722

723
  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
724 725
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
726

727 728 729
  so = get_scan(xd, tx_size, pd->plane_type, 0);
  rd_stack->scan = so->scan;
  rd_stack->nb = so->neighbors;
Deb Mukherjee's avatar
Deb Mukherjee committed
730

731
  foreach_transformed_block_in_plane(xd, bsize, plane,
732
                                     block_rd_txfm, rd_stack);
733
  if (rd_stack->skip) {
734 735 736 737 738
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
739 740 741
    *distortion = rd_stack->this_dist;
    *rate       = rd_stack->this_rate;
    *sse        = rd_stack->this_sse;
742
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
743
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
744 745 746 747 748
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
749
                                     int64_t ref_best_rd,
750
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
751
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
752
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
753
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
754
  MACROBLOCKD *const xd = &x->e_mbd;
755
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
756 757 758

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

759
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
760 761
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
762
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
763 764
}

765
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
766
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
767
                                     int64_t *d, int64_t *distortion,
768
                                     int *s, int *skip,
769
                                     int64_t tx_cache[TX_MODES],
770
                                     BLOCK_SIZE bs) {
771
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
772 773
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
774
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
775
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
776
  int64_t rd[TX_SIZES][2];
777
  int n, m;
778
  int s0, s1;
779 780 781
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
782

783
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
784 785 786
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
787

788
  for (n = TX_4X4; n <= max_tx_size; n++) {
789 790 791 792 793 794 795 796 797
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
798 799
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
800
    } else if (s[n]) {
801 802 803 804
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
805
    }
806 807 808 809 810

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
811
  }
812 813
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
814 815


816 817 818
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
819

820 821 822 823
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
824

825 826
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
827
    cpi->tx_stepdown_count[0]++;
828 829
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
830
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
831
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
832
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
833
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
834
  } else {
835
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
836
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
837
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
838
}
839

Deb Mukherjee's avatar
Deb Mukherjee committed
840 841 842 843
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
844
                                          int64_t ref_best_rd,
845
                                          BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
846
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
847 848
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
849
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
850
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
851
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
852 853
  int n, m;
  int s0, s1;
854
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
855 856 857
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
858

859
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
860 861 862
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
863

Dmitry Kovalev's avatar
Dmitry Kovalev committed
864
  for (n = TX_4X4; n <= max_tx_size; n++) {
865
    double scale = scale_rd[n];
Deb Mukherjee's avatar
Deb Mukherjee committed
866
    r[n][1] = r[n][0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
867
    for (m = 0; m <= n - (n == max_tx_size); m++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
868 869 870 871 872 873
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
    if (s[n]) {
874
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
Deb Mukherjee's avatar
Deb Mukherjee committed
875
    } else {
876 877 878 879 880 881
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
    }
    if (rd[n][1] < best_rd) {
      best_rd = rd[n][1];
      best_tx = n;
Deb Mukherjee's avatar
Deb Mukherjee committed