vp9_rdopt.c 166 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17
18
19
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
20
21
22
23
24
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
25
26
27
28
29
30
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
31
#include "vpx_mem/vpx_mem.h"
32
33
34
35
36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
37
#include "./vp9_rtcd.h"
38
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
39
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define INVALID_MV 0x80008000

43
44
45
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

46
47
48
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
49

Paul Wilkins's avatar
Paul Wilkins committed
50
51
#define MIN_EARLY_TERM_INDEX    3

52
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},

  {DC_PRED,   INTRA_FRAME,  NONE},

  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},

  {NEARMV,    LAST_FRAME,   NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},

  {TM_PRED,   INTRA_FRAME,  NONE},

  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},

  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},

  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D207_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
};

const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
  {LAST_FRAME,   NONE},
  {GOLDEN_FRAME, NONE},
  {ALTREF_FRAME, NONE},
  {LAST_FRAME,   ALTREF_FRAME},
  {GOLDEN_FRAME, ALTREF_FRAME},
  {INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
99
100
};

101
102
103
104
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
105
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
106
107
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

Deb Mukherjee's avatar
Deb Mukherjee committed
108
109
110
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
#define RD_THRESH_POW      1.25
111
#define RD_MULT_EPB_RATIO  64
Deb Mukherjee's avatar
Deb Mukherjee committed
112

113
114
#define MV_COST_WEIGHT      108
#define MV_COST_WEIGHT_SUB  120
115

116
117
118
119
120
121
122
123
124
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
125
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
126
127
128
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
static void fill_mode_costs(VP9_COMP *c) {
  VP9_COMMON *const cm = &c->common;
  int i, j;

  for (i = 0; i < INTRA_MODES; i++)
    for (j = 0; j < INTRA_MODES; j++)
      vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
                      vp9_intra_mode_tree);

  // TODO(rbultje) separate tables for superblock costing?
  vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1],
                  vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
                  cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
                  vp9_kf_uv_mode_prob[INTRA_MODES - 1],
                  vp9_intra_mode_tree);

  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
    vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
                    cm->fc.switchable_interp_prob[i],
                    vp9_switchable_interp_tree);
}

153
static void fill_token_costs(vp9_coeff_cost *c,
154
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
155
  int i, j, k, l;
156
  TX_SIZE t;
157
  for (t = TX_4X4; t <= TX_32X32; ++t)
158
    for (i = 0; i < PLANE_TYPES; ++i)
159
160
161
      for (j = 0; j < REF_TYPES; ++j)
        for (k = 0; k < COEF_BANDS; ++k)
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
162
163
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
164
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
165
                            vp9_coef_tree);
166
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
167
                                 vp9_coef_tree);
168
169
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
                   c[t][i][j][k][1][l][EOB_TOKEN]);
170
          }
171
172
}

173
174
175
176
177
178
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
179

180
// 3* dc_qlookup[Q]*dc_qlookup[Q];
181

182
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
183
184
185
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

186
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
187
188
189
190
191
192
193
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
194
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
195
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
196
  }
Paul Wilkins's avatar
Paul Wilkins committed
197
}
John Koleszar's avatar
John Koleszar committed
198

199
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
200
  const int q = vp9_dc_quant(qindex, 0);
201
  // TODO(debargha): Adjust the function below
202
203
204
205
206
207
208
209
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
210
211
212
213
214
215
216
217
218
}

static int compute_rd_thresh_factor(int qindex) {
  int q;
  // TODO(debargha): Adjust the function below
  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  if (q < 8)
    q = 8;
  return q;
219
220
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
221
222
223
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
224
225
}

226
227
228
229
230
231
232
233
234
static void set_block_thresholds(VP9_COMP *cpi) {
  int i, bsize, segment_id;
  VP9_COMMON *cm = &cpi->common;

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    int q;
    int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
    segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
    q = compute_rd_thresh_factor(segment_qindex);
235

236
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
237
238
239
      // Threshold here seem unecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[]
      int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
John Koleszar's avatar
John Koleszar committed
240

241
242
243
244
245
246
247
248
      for (i = 0; i < MAX_MODES; ++i) {
        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[segment_id][bsize][i] =
              cpi->sf.thresh_mult[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
        }
John Koleszar's avatar
John Koleszar committed
249
      }
250

251
252
253
254
255
256
257
258
      for (i = 0; i < MAX_REFS; ++i) {
        if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
              cpi->sf.thresh_mult_sub8x8[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
        }
259
260
      }
    }
John Koleszar's avatar
John Koleszar committed
261
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
262
263
}

264
265
266
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  int qindex, i;
Deb Mukherjee's avatar
Deb Mukherjee committed
267
268
269
270
271
272
273

  vp9_clear_system_state();  // __asm emms;

  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
274
  qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
Deb Mukherjee's avatar
Deb Mukherjee committed
275

276
  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
277
278
  cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);

279
  cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
Deb Mukherjee's avatar
Deb Mukherjee committed
280
281
282
283
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);

  vp9_set_speed_features(cpi);

284
285
  cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                              cm->frame_type != KEY_FRAME) ?
286
                              0 : 1;
287

288
  set_block_thresholds(cpi);
John Koleszar's avatar
John Koleszar committed
289

290
  fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
291

292
  for (i = 0; i < PARTITION_CONTEXTS; i++)
293
    vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
294
295
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
296
  /*rough estimate for costing*/
297
  fill_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
298

299
  if (!frame_is_intra_only(cm)) {
300
    vp9_build_nmv_cost_table(
301
        cpi->mb.nmvjointcost,
302
303
304
        cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cm->fc.nmvc,
        cm->allow_high_precision_mv, 1, 1);
305

306
307
308
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
      vp9_cost_tokens((int *)cpi->mb.inter_mode_cost[i],
                      cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
309
  }
John Koleszar's avatar
John Koleszar committed
310
311
}

Yaowu Xu's avatar
Yaowu Xu committed
312
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
313
314
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
315
316
317
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
318
319
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
320
321
  } else {
    double a = y - d;
322
323
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
324
325
326
  }
}

327
328
329
330
331
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
332
  // Normalized rate
333
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
357
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
380
381
382
383
384
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
385
  assert(x >= 0.0);
386
387
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
406
    model_rd_norm(x, &R, &D);
Yaowu Xu's avatar
Yaowu Xu committed
407
408
    *rate = (int)((n << 8) * R + 0.5);
    *dist = (int)(var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
409
410
411
412
  }
  vp9_clear_system_state();
}

413
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
414
415
416
417
418
419
420
421
422
423
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
424
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
425
426
427
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
428
                              pd->dst.buf, pd->dst.stride, &x->pred_sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
429
    // sse works better than var, since there is no dc prediction used
430
    model_rd_from_var_lapndz(x->pred_sse, 1 << num_pels_log2_lookup[bs],
431
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
432
433

    rate_sum += rate;
Yaowu Xu's avatar
Yaowu Xu committed
434
    dist_sum += (int)dist;
Deb Mukherjee's avatar
Deb Mukherjee committed
435
436
437
438
439
440
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

441
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
442
443
444
445
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
446
  int j, k;
447
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
448
449
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
450
451
  const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 << num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
452
453
  int rate_sum = 0;
  int64_t dist_sum = 0;
454
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
455
456
457
458
459
460
461
462
463
464
465
466

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
467

Deb Mukherjee's avatar
Deb Mukherjee committed
468
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
469
470
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
471
472
473
      int rate;
      int64_t dist;
      unsigned int sse;
474
475
476
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
477
      // sse works better than var, since there is no dc prediction used
478
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
479
480
481
482
483
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
484

Deb Mukherjee's avatar
Deb Mukherjee committed
485
  *out_rate_sum = rate_sum;
486
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
487
488
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
489
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
490
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
491
  int i;
492
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
493

494
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
495
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
496
    error += (unsigned)this_diff * this_diff;
497
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
498
  }
John Koleszar's avatar
John Koleszar committed
499

500
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
501
  return error;
John Koleszar's avatar
John Koleszar committed
502
503
}

504
505
506
507
508
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
509
static const int16_t band_counts[TX_SIZES][8] = {
510
511
512
513
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
514
515
};

516
static INLINE int cost_coeffs(MACROBLOCK *x,
517
                              int plane, int block,
518
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
519
                              TX_SIZE tx_size,
520
                              const int16_t *scan, const int16_t *nb) {
521
  MACROBLOCKD *const xd = &x->e_mbd;
522
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
523
  struct macroblock_plane *p = &x->plane[plane];
524
525
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
526
  const int16_t *band_count = &band_counts[tx_size][1];
527
  const int eob = p->eobs[block];
528
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
529
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
530
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
531
                   x->token_costs[tx_size][type][ref];
532
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
533
  uint8_t *p_tok = x->token_cache;
534
535
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
536
537

  // Check for consistency of tx_size with mode info
538
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
539
                                      : get_uv_tx_size(mbmi) == tx_size);
540

541
542
  if (eob == 0) {
    // single eob token
543
    cost = token_costs[0][0][pt][EOB_TOKEN];
544
    c = 0;
545
  } else {
546
    int band_left = *band_count++;
547
548

    // dc token
549
550
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
551
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
552
    p_tok[0] = vp9_pt_energy_class[prev_t];
553
    ++token_costs;
554
555
556
557

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
558
      int t;
559
560
561

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
562
      pt = get_coef_context(nb, p_tok, c);
563
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
564
      p_tok[rc] = vp9_pt_energy_class[t];
565
      prev_t = t;
566
      if (!--band_left) {
567
568
        band_left = *band_count++;
        ++token_costs;
569
      }
570
    }
571
572

    // eob token
573
    if (band_left) {
574
      pt = get_coef_context(nb, p_tok, c);
575
      cost += (*token_costs)[0][pt][EOB_TOKEN];
576
    }
577
578
  }

579
  // is eob first coefficient;
580
  *A = *L = (c > 0);
581

582
583
584
  return cost;
}

585
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
586
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
587
588
589
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
590
591
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
592
593
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
594
595
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
596
597
598
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
599

600
  if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
601
602
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
603
                    (1 << ss_txfrm_size)) >> (shift + 2);
604
605
    args->dist += (p >> 4);
    args->sse  += p;
606
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
607
608
}

609
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
610
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
611
  struct rdcost_block_args* args = arg;
612

Deb Mukherjee's avatar
Deb Mukherjee committed
613
  int x_idx, y_idx;
614
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
615

616
617
618
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                           args->t_left + y_idx, args->tx_size,
                           args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
619
620
}

621
622
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
623
624
625
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
626
  struct encode_b_args encode_args = {x, NULL};
627
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
628

629
630
631
  if (args->skip)
    return;

632
  if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
633
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
634
  else
635
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
636

637
638
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
639
640
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
641
642

  // TODO(jingning): temporarily enabled only for luma component
643
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
644
  if (plane == 0)
645
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
646
                                    (rd1 > rd2 && !xd->lossless);
647

648
649
650
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
651
652
653
654
655
656
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
657
658
}

659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
James Zern's avatar
James Zern committed
688
      assert(0 && "Invalid transform size.");
689
690
691
  }
}

692
693
694
695
696
697
698
699
700
701
702
703
static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
                              const int num_4x4_w, const int num_4x4_h,
                              const int64_t ref_rdcost,
                              struct rdcost_block_args *arg) {
  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
  arg->x = x;
  arg->tx_size = tx_size;
  arg->bw = num_4x4_w;
  arg->bh = num_4x4_h;
  arg->best_rd = ref_rdcost;
}

704
static void txfm_rd_in_plane(MACROBLOCK *x,
705
                             struct rdcost_block_args *rd_stack,
706
707
708
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
709
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
710
  MACROBLOCKD *const xd = &x->e_mbd;
711
  struct macroblockd_plane *const pd = &xd->plane[plane];
712
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
713
714
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
715
  const scan_order *so;
716

717
718
  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
                    ref_best_rd, rd_stack);
719
  if (plane == 0)
720
    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
721

722
  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
723
724
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
725

726
727
728
  so = get_scan(xd, tx_size, pd->plane_type, 0);
  rd_stack->scan = so->scan;
  rd_stack->nb = so->neighbors;
Deb Mukherjee's avatar
Deb Mukherjee committed
729

730
  foreach_transformed_block_in_plane(xd, bsize, plane,
731
                                     block_rd_txfm, rd_stack);
732
  if (rd_stack->skip) {
733
734
735
736
737
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
738
739
740
    *distortion = rd_stack->this_dist;
    *rate       = rd_stack->this_rate;
    *sse        = rd_stack->this_sse;
741
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
742
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
743
744
745
746
747
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
748
                                     int64_t ref_best_rd,
749
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
750
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
751
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
752
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
753
  MACROBLOCKD *const xd = &x->e_mbd;
754
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
755
756
757

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

758
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
759
760
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
761
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
762
763
}

764
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
765
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
766
                                     int64_t *d, int64_t *distortion,
767
                                     int *s, int *skip,
768
                                     int64_t tx_cache[TX_MODES],
769
                                     BLOCK_SIZE bs) {
770
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
771
772
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
773
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
774
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
775
  int64_t rd[TX_SIZES][2];
776
  int n, m;
777
  int s0, s1;
778
779
780
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
781

782
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
783
784
785
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
786

787
  for (n = TX_4X4; n <= max_tx_size; n++) {
788
789
790
791
792
793
794
795
796
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
797
798
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
799
    } else if (s[n]) {
800
801
802
803
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
804
    }
805
806
807
808
809

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
810
  }
811
812
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
813
814


815
816
817
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
818

819
820
821
822
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
823

824
825
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
826
    cpi->tx_stepdown_count[0]++;
827
828
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
829
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
830
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
831
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
832
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
833
  } else {
834
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
835
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
836
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
837
}
838

Deb Mukherjee's avatar
Deb Mukherjee committed
839
840
841
842
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
843
                                          int64_t ref_best_rd,
844
                                          BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
845
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
846
847
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
848
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
849
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
850
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
851
852
  int n, m;
  int s0, s1;
853
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
854
855
856
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
857

858
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
859
860
861
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
862

Dmitry Kovalev's avatar
Dmitry Kovalev committed
863
  for (n = TX_4X4; n <= max_tx_size; n++) {
864
    double scale = scale_rd[n];
Deb Mukherjee's avatar
Deb Mukherjee committed
865
    r[n][1] = r[n][0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
866
    for (m = 0; m <= n - (n == max_tx_size); m++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
867
868
869
870
871
872
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
    if (s[n]) {
873
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
Deb Mukherjee's avatar
Deb Mukherjee committed
874
    } else {
875
876
877
878
879
880
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
    }
    if (rd[n][1] < best_rd) {
      best_rd = rd[n][1];
      best_tx = n;
Deb Mukherjee's avatar
Deb Mukherjee committed
881
882
    }
  }
883

884
885
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
886

887
888
  // Actually encode using the chosen mode if a model was used, but do not
  // update the r, d costs
889
890
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
891

892
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
893
    cpi->tx_stepdown_count[0]++;
894
  } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
895
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
896
  } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
897
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
898
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
899
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
900
  }