vp9_rdopt.c 166 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17
18
19
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
20
21
22
23
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_quant_common.h"
24
25
26
27
28
29
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
30
#include "vpx_mem/vpx_mem.h"
31
32
33
34
35
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
36
#include "./vp9_rtcd.h"
37
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
38
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
39

40
41
#define INVALID_MV 0x80008000

42
43
44
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

45
46
47
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
48

Paul Wilkins's avatar
Paul Wilkins committed
49
50
#define MIN_EARLY_TERM_INDEX    3

51
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},

  {DC_PRED,   INTRA_FRAME,  NONE},

  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},

  {NEARMV,    LAST_FRAME,   NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},

  {TM_PRED,   INTRA_FRAME,  NONE},

  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},

  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},

  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D207_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
};

const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
  {LAST_FRAME,   NONE},
  {GOLDEN_FRAME, NONE},
  {ALTREF_FRAME, NONE},
  {LAST_FRAME,   ALTREF_FRAME},
  {GOLDEN_FRAME, ALTREF_FRAME},
  {INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98
99
};

100
101
102
103
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
104
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
105
106
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

Deb Mukherjee's avatar
Deb Mukherjee committed
107
108
109
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
#define RD_THRESH_POW      1.25
110
#define RD_MULT_EPB_RATIO  64
Deb Mukherjee's avatar
Deb Mukherjee committed
111

112
113
#define MV_COST_WEIGHT      108
#define MV_COST_WEIGHT_SUB  120
114

115
116
117
118
119
120
121
122
123
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
124
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
125
126
127
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
static void fill_mode_costs(VP9_COMP *c) {
  VP9_COMMON *const cm = &c->common;
  int i, j;

  for (i = 0; i < INTRA_MODES; i++)
    for (j = 0; j < INTRA_MODES; j++)
      vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
                      vp9_intra_mode_tree);

  // TODO(rbultje) separate tables for superblock costing?
  vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1],
                  vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
                  cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
                  vp9_kf_uv_mode_prob[INTRA_MODES - 1],
                  vp9_intra_mode_tree);

  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
    vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
                    cm->fc.switchable_interp_prob[i],
                    vp9_switchable_interp_tree);
}

152
static void fill_token_costs(vp9_coeff_cost *c,
153
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
154
  int i, j, k, l;
155
  TX_SIZE t;
156
  for (t = TX_4X4; t <= TX_32X32; ++t)
157
    for (i = 0; i < PLANE_TYPES; ++i)
158
159
160
      for (j = 0; j < REF_TYPES; ++j)
        for (k = 0; k < COEF_BANDS; ++k)
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
161
162
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
163
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
164
                            vp9_coef_tree);
165
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
166
                                 vp9_coef_tree);
167
168
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
                   c[t][i][j][k][1][l][EOB_TOKEN]);
169
          }
170
171
}

172
173
174
175
176
177
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
178

179
// 3* dc_qlookup[Q]*dc_qlookup[Q];
180

181
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
182
183
184
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

185
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
186
187
188
189
190
191
192
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
193
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
194
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
195
  }
Paul Wilkins's avatar
Paul Wilkins committed
196
}
John Koleszar's avatar
John Koleszar committed
197

198
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
199
  const int q = vp9_dc_quant(qindex, 0);
200
  // TODO(debargha): Adjust the function below
201
202
203
204
205
206
207
208
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
209
210
211
212
213
214
215
216
217
}

static int compute_rd_thresh_factor(int qindex) {
  int q;
  // TODO(debargha): Adjust the function below
  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  if (q < 8)
    q = 8;
  return q;
218
219
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
220
221
222
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
223
224
}

225
226
227
228
229
230
231
232
233
static void set_block_thresholds(VP9_COMP *cpi) {
  int i, bsize, segment_id;
  VP9_COMMON *cm = &cpi->common;

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    int q;
    int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
    segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
    q = compute_rd_thresh_factor(segment_qindex);
234

235
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
236
237
238
      // Threshold here seem unecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[]
      int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
John Koleszar's avatar
John Koleszar committed
239

240
241
242
243
244
245
246
247
      for (i = 0; i < MAX_MODES; ++i) {
        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[segment_id][bsize][i] =
              cpi->sf.thresh_mult[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
        }
John Koleszar's avatar
John Koleszar committed
248
      }
249

250
251
252
253
254
255
256
257
      for (i = 0; i < MAX_REFS; ++i) {
        if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
              cpi->sf.thresh_mult_sub8x8[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
        }
258
259
      }
    }
John Koleszar's avatar
John Koleszar committed
260
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
261
262
}

263
264
265
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  int qindex, i;
Deb Mukherjee's avatar
Deb Mukherjee committed
266
267
268
269
270
271
272

  vp9_clear_system_state();  // __asm emms;

  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
273
  qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
Deb Mukherjee's avatar
Deb Mukherjee committed
274

275
  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
276
277
  cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);

278
  cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
Deb Mukherjee's avatar
Deb Mukherjee committed
279
280
281
282
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);

  vp9_set_speed_features(cpi);

283
284
  cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                              cm->frame_type != KEY_FRAME) ?
285
                              0 : 1;
286

287
  set_block_thresholds(cpi);
John Koleszar's avatar
John Koleszar committed
288

289
  fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
290

291
  for (i = 0; i < PARTITION_CONTEXTS; i++)
292
    vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
293
294
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
295
  /*rough estimate for costing*/
296
  fill_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
297

298
  if (!frame_is_intra_only(cm)) {
299
    vp9_build_nmv_cost_table(
300
        cpi->mb.nmvjointcost,
301
302
303
        cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cm->fc.nmvc,
        cm->allow_high_precision_mv, 1, 1);
304

305
306
307
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
      vp9_cost_tokens((int *)cpi->mb.inter_mode_cost[i],
                      cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
308
  }
John Koleszar's avatar
John Koleszar committed
309
310
}

Yaowu Xu's avatar
Yaowu Xu committed
311
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
312
313
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
314
315
316
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
317
318
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
319
320
  } else {
    double a = y - d;
321
322
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
323
324
325
  }
}

326
327
328
329
330
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
331
  // Normalized rate
332
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
356
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
379
380
381
382
383
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
384
  assert(x >= 0.0);
385
386
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
405
    model_rd_norm(x, &R, &D);
Yaowu Xu's avatar
Yaowu Xu committed
406
407
    *rate = (int)((n << 8) * R + 0.5);
    *dist = (int)(var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
408
409
410
411
  }
  vp9_clear_system_state();
}

412
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
413
414
415
416
417
418
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;
419
420
  int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
  unsigned int sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
421
422
423
424

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
425
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
426
427
428
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
429
430
431
                              pd->dst.buf, pd->dst.stride, &sse);
    if (i == 0)
      x->pred_sse[ref] = sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
432
    // sse works better than var, since there is no dc prediction used
433
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
434
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
435
436

    rate_sum += rate;
Yaowu Xu's avatar
Yaowu Xu committed
437
    dist_sum += (int)dist;
Deb Mukherjee's avatar
Deb Mukherjee committed
438
439
440
441
442
443
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

444
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
445
446
447
448
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
449
  int j, k;
450
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
451
452
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
453
454
  const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 << num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
455
456
  int rate_sum = 0;
  int64_t dist_sum = 0;
457
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
458
459
460
461
462
463
464
465
466
467
468
469

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
470

Deb Mukherjee's avatar
Deb Mukherjee committed
471
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
472
473
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
474
475
476
      int rate;
      int64_t dist;
      unsigned int sse;
477
478
479
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
480
      // sse works better than var, since there is no dc prediction used
481
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
482
483
484
485
486
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
487

Deb Mukherjee's avatar
Deb Mukherjee committed
488
  *out_rate_sum = rate_sum;
489
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
490
491
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
492
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
493
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
494
  int i;
495
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
496

497
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
498
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
499
    error += (unsigned)this_diff * this_diff;
500
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
501
  }
John Koleszar's avatar
John Koleszar committed
502

503
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
504
  return error;
John Koleszar's avatar
John Koleszar committed
505
506
}

507
508
509
510
511
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
512
static const int16_t band_counts[TX_SIZES][8] = {
513
514
515
516
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
517
518
};

519
static INLINE int cost_coeffs(MACROBLOCK *x,
520
                              int plane, int block,
521
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
522
                              TX_SIZE tx_size,
523
                              const int16_t *scan, const int16_t *nb) {
524
  MACROBLOCKD *const xd = &x->e_mbd;
525
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
526
  struct macroblock_plane *p = &x->plane[plane];
527
528
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
529
  const int16_t *band_count = &band_counts[tx_size][1];
530
  const int eob = p->eobs[block];
531
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
532
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
533
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
534
                   x->token_costs[tx_size][type][ref];
535
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
536
  uint8_t *p_tok = x->token_cache;
537
538
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
539
540

  // Check for consistency of tx_size with mode info
541
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
542
                                      : get_uv_tx_size(mbmi) == tx_size);
543

544
545
  if (eob == 0) {
    // single eob token
546
    cost = token_costs[0][0][pt][EOB_TOKEN];
547
    c = 0;
548
  } else {
549
    int band_left = *band_count++;
550
551

    // dc token
552
553
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
554
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
555
    p_tok[0] = vp9_pt_energy_class[prev_t];
556
    ++token_costs;
557
558
559
560

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
561
      int t;
562
563
564

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
565
      pt = get_coef_context(nb, p_tok, c);
566
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
567
      p_tok[rc] = vp9_pt_energy_class[t];
568
      prev_t = t;
569
      if (!--band_left) {
570
571
        band_left = *band_count++;
        ++token_costs;
572
      }
573
    }
574
575

    // eob token
576
    if (band_left) {
577
      pt = get_coef_context(nb, p_tok, c);
578
      cost += (*token_costs)[0][pt][EOB_TOKEN];
579
    }
580
581
  }

582
  // is eob first coefficient;
583
  *A = *L = (c > 0);
584

585
586
587
  return cost;
}

588
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
589
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
590
591
592
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
593
594
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
595
596
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
597
598
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
599
600
601
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
602

603
  if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
604
605
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
606
                    (1 << ss_txfrm_size)) >> (shift + 2);
607
608
    args->dist += (p >> 4);
    args->sse  += p;
609
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
610
611
}

612
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
613
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
614
  struct rdcost_block_args* args = arg;
615

Deb Mukherjee's avatar
Deb Mukherjee committed
616
  int x_idx, y_idx;
617
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
618

619
620
621
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                           args->t_left + y_idx, args->tx_size,
                           args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
622
623
}

624
625
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
626
627
628
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
629
  struct encode_b_args encode_args = {x, NULL};
630
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
631

632
633
634
  if (args->skip)
    return;

635
  if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
636
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
637
  else
638
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
639

640
641
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
642
643
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
644
645

  // TODO(jingning): temporarily enabled only for luma component
646
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
647
  if (plane == 0)
648
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
649
                                    (rd1 > rd2 && !xd->lossless);
650

651
652
653
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
654
655
656
657
658
659
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
660
661
}

662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
James Zern's avatar
James Zern committed
691
      assert(0 && "Invalid transform size.");
692
693
694
  }
}

695
696
697
698
699
700
701
702
703
704
705
706
static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
                              const int num_4x4_w, const int num_4x4_h,
                              const int64_t ref_rdcost,
                              struct rdcost_block_args *arg) {
  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
  arg->x = x;
  arg->tx_size = tx_size;
  arg->bw = num_4x4_w;
  arg->bh = num_4x4_h;
  arg->best_rd = ref_rdcost;
}

707
static void txfm_rd_in_plane(MACROBLOCK *x,
708
                             struct rdcost_block_args *rd_stack,
709
710
711
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
712
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
713
  MACROBLOCKD *const xd = &x->e_mbd;
714
  struct macroblockd_plane *const pd = &xd->plane[plane];
715
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
716
717
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
718
  const scan_order *so;
719

720
721
  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
                    ref_best_rd, rd_stack);
722
  if (plane == 0)
723
    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
724

725
  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
726
727
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
728

729
730
731
  so = get_scan(xd, tx_size, pd->plane_type, 0);
  rd_stack->scan = so->scan;
  rd_stack->nb = so->neighbors;
Deb Mukherjee's avatar
Deb Mukherjee committed
732

733
  foreach_transformed_block_in_plane(xd, bsize, plane,
734
                                     block_rd_txfm, rd_stack);
735
  if (rd_stack->skip) {
736
737
738
739
740
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
741
742
743
    *distortion = rd_stack->this_dist;
    *rate       = rd_stack->this_rate;
    *sse        = rd_stack->this_sse;
744
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
745
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
746
747
748
749
750
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
751
                                     int64_t ref_best_rd,
752
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
753
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
754
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
755
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
756
  MACROBLOCKD *const xd = &x->e_mbd;
757
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
758
759
760

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

761
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
762
763
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
764
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
765
766
}

767
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
768
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
769
                                     int64_t *d, int64_t *distortion,
770
                                     int *s, int *skip,
771
                                     int64_t tx_cache[TX_MODES],
772
                                     BLOCK_SIZE bs) {
773
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
774
775
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
776
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
777
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
778
  int64_t rd[TX_SIZES][2];
779
  int n, m;
780
  int s0, s1;
781
782
783
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
784

785
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
786
787
788
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
789

790
  for (n = TX_4X4; n <= max_tx_size; n++) {
791
792
793
794
795
796
797
798
799
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
800
801
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
802
    } else if (s[n]) {
803
804
805
806
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
807
    }
808
809
810
811
812

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
813
  }
814
815
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
816
817


818
819
820
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
821

822
823
824
825
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
826

827
828
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
829
    cpi->tx_stepdown_count[0]++;
830
831
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
832
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
833
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
834
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
835
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
836
  } else {
837
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
838
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
839
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
840
}
841

Deb Mukherjee's avatar
Deb Mukherjee committed
842
843
844
845
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
846
                                          int64_t ref_best_rd,
847
                                          BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
848
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
849
850
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
851
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
852
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
853
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
854
855
  int n, m;
  int s0, s1;
856
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
857
858
859
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
860

861
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
862
863
864
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
865

Dmitry Kovalev's avatar
Dmitry Kovalev committed
866
  for (n = TX_4X4; n <= max_tx_size; n++) {
867
    double scale = scale_rd[n];
Deb Mukherjee's avatar
Deb Mukherjee committed
868
    r[n][1] = r[n][0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
869
    for (m = 0; m <= n - (n == max_tx_size); m++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
870
871
872
873
874
875
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
    if (s[n]) {
876
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
Deb Mukherjee's avatar
Deb Mukherjee committed
877
    } else {
878
879
880
881
882
883
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
    }
    if (rd[n][1] < best_rd) {
      best_rd = rd[n][1];
      best_tx = n;
Deb Mukherjee's avatar
Deb Mukherjee committed
884
885
    }
  }
886

887
888
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
889

890
891
  // Actually encode using the chosen mode if a model was used, but do not
  // update the r, d costs
892
893
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
894

895
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
896
    cpi->tx_stepdown_count[0]++;
897
  } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
898
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
899
  } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
900
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed