vp9_rdopt.c 153 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17
18
19
20
21
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
22
23
24
25
26
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
27
28
29
30
31
32
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
33
#include "vpx_mem/vpx_mem.h"
34
35
36
37
38
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
39
#include "vp9_rtcd.h"
40
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
41
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
42

43
44
#define INVALID_MV 0x80008000

45
46
47
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

48
49
50
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
51
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53

54
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
55
  {NEARESTMV, LAST_FRAME,   NONE},
56
57
  {DC_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
58
59
60
61
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
  {NEWMV,     LAST_FRAME,   NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
62
  {NEARMV,    LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
63
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
64

Paul Wilkins's avatar
Paul Wilkins committed
65
66
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
67
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
68

69
70
  {TM_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
71
72
73
74
75
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
76

77
78
79
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
Paul Wilkins's avatar
Paul Wilkins committed
80
81
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
82

Paul Wilkins's avatar
Paul Wilkins committed
83
84
85
  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
Ronald S. Bultje's avatar
Ronald S. Bultje committed
86
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
87
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
88

Paul Wilkins's avatar
Paul Wilkins committed
89
90
91
92
93
94
95
96
97
  {I4X4_PRED, INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98
99
};

100
101
102
103
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
104
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
105
106
107
108
109
110
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

111
static void fill_token_costs(vp9_coeff_cost *c,
112
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
113
  int i, j, k, l;
114
115
116
117
118
119
120
121
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
122
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
123
                            vp9_coef_tree);
124
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
125
                                 vp9_coef_tree);
126
127
            assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
128
          }
129
130
}

131
132
133
134
135
136
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
137

138
// 3* dc_qlookup[Q]*dc_qlookup[Q];
139

140
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
141
142
143
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

144
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
145
146
147
148
149
150
151
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
152
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
153
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
154
  }
Paul Wilkins's avatar
Paul Wilkins committed
155
}
John Koleszar's avatar
John Koleszar committed
156

157
static int compute_rd_mult(int qindex) {
158
  const int q = vp9_dc_quant(qindex, 0);
159
  return (11 * q * q) >> 2;
160
161
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
162
163
164
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
165
166
}

167

Dmitry Kovalev's avatar
Dmitry Kovalev committed
168
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
169
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
170

171
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
172

John Koleszar's avatar
John Koleszar committed
173
174
175
176
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
177
  qindex = clamp(qindex, 0, MAXQ);
178

Dmitry Kovalev's avatar
Dmitry Kovalev committed
179
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
180
181
182
183
184
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
185
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
186
  }
187
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
188
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
189

190
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
191

Dmitry Kovalev's avatar
Dmitry Kovalev committed
192
193
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
194
195
  if (q < 8)
    q = 8;
196

John Koleszar's avatar
John Koleszar committed
197
198
199
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
200

201
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
202
203
204
205
206
207
208
209
210
211
212
213
214
215
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
216

217
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
218
219
220
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
221
      }
John Koleszar's avatar
John Koleszar committed
222
    }
John Koleszar's avatar
John Koleszar committed
223
224
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
225

226
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
227
228
229
230
231
232
233
234
235
236
237
238
239
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
Paul Wilkins's avatar
Paul Wilkins committed
240

241
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
242
243
244
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
245
      }
John Koleszar's avatar
John Koleszar committed
246
    }
John Koleszar's avatar
John Koleszar committed
247
  }
John Koleszar's avatar
John Koleszar committed
248

249
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
250

251
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
252
    vp9_cost_tokens(cpi->mb.partition_cost[i],
253
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
254
255
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
256
  /*rough estimate for costing*/
257
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
258

259
  if (cpi->common.frame_type != KEY_FRAME) {
260
    vp9_build_nmv_cost_table(
261
262
263
264
265
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
266
267
268
269
270
271

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
        cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
272
            cost_token(vp9_inter_mode_tree,
273
                       cpi->common.fc.inter_mode_probs[i],
274
                       vp9_inter_mode_encodings - NEARESTMV + m);
275
    }
276
  }
John Koleszar's avatar
John Koleszar committed
277
278
}

Yaowu Xu's avatar
Yaowu Xu committed
279
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
280
281
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
282
283
284
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
285
286
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
287
288
  } else {
    double a = y - d;
289
290
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
291
292
293
  }
}

294
295
296
297
298
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
299
  // Normalized rate
300
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
324
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
347
348
349
350
351
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
352
  assert(x >= 0.0);
353
354
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
373
374
375
    model_rd_norm(x, &R, &D);
    *rate = ((n << 8) * R + 0.5);
    *dist = (var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
  }
  vp9_clear_system_state();
}

static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
391
    const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
392
393
394
395
396
397
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
Dmitry Kovalev's avatar
Dmitry Kovalev committed
398
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
399
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
400
401
402
403
404
405
406
407
408
409
410
411
412
413

    rate_sum += rate;
    dist_sum += dist;
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
414
  int t = 4, j, k;
415
  BLOCK_SIZE_TYPE bs = BLOCK_4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
416
417
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
Jim Bankoski's avatar
Jim Bankoski committed
418
419
  const int width = plane_block_width(bsize, pd);
  const int height = plane_block_height(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
  int rate_sum = 0;
  int64_t dist_sum = 0;

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
    t = 4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
    t = 8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
    t = 16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
    t = 32;
  } else {
    assert(0);
  }
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
439
440
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
      int rate;
      int64_t dist;
      unsigned int sse;
      (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
                                p->src.stride,
                                pd->dst.buf + j * pd->dst.stride + k,
                                pd->dst.stride, &sse);
      // sse works better than var, since there is no dc prediction used
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
                               &rate, &dist);
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
  *out_rate_sum = rate_sum;
  *out_dist_sum = (dist_sum << 4);
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
460
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
461
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
462
  int i;
463
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
464

465
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
466
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
467
    error += (unsigned)this_diff * this_diff;
468
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
469
  }
John Koleszar's avatar
John Koleszar committed
470

471
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
472
  return error;
John Koleszar's avatar
John Koleszar committed
473
474
}

475
476
477
478
479
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
480
static const int16_t band_counts[TX_SIZES][8] = {
481
482
483
484
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
485
486
};

487
static INLINE int cost_coeffs(MACROBLOCK *mb,
488
                              int plane, int block, PLANE_TYPE type,
489
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
490
                              TX_SIZE tx_size,
491
                              const int16_t *scan, const int16_t *nb) {
492
  MACROBLOCKD *const xd = &mb->e_mbd;
493
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
494
  int pt, c, cost;
495
  const int16_t *band_count = &band_counts[tx_size][1];
496
  const int eob = xd->plane[plane].eobs[block];
497
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
498
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
499
  unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS]
500
                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
501
  ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
502
  uint8_t token_cache[1024];
503
504

  // Check for consistency of tx_size with mode info
505
  assert((!type && !plane) || (type && plane));
506
507
508
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
509
    assert(tx_size == get_uv_tx_size(mbmi));
510
511
  }

512
  pt = combine_entropy_contexts(above_ec, left_ec);
513

514
515
  if (eob == 0) {
    // single eob token
516
517
    cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
    c = 0;
518
  } else {
519
    int v, prev_t, band_left = *band_count++;
520
521
522

    // dc token
    v = qcoeff_ptr[0];
523
    prev_t = vp9_dct_value_tokens_ptr[v].token;
524
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
525
    token_cache[0] = vp9_pt_energy_class[prev_t];
526
    ++token_costs;
527
528
529
530

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
531
      int t;
532
533
534

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
535
      pt = get_coef_context(nb, token_cache, c);
536
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
537
      token_cache[rc] = vp9_pt_energy_class[t];
538
      prev_t = t;
539
      if (!--band_left) {
540
541
        band_left = *band_count++;
        ++token_costs;
542
      }
543
    }
544
545

    // eob token
546
    if (band_left) {
547
      pt = get_coef_context(nb, token_cache, c);
548
      cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
549
    }
550
551
  }

552
  // is eob first coefficient;
553
  *A = *L = c > 0;
554

555
556
557
  return cost;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
558
559
560
561
562
563
564
565
566
567
568
569
570
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int rate;
  int64_t dist;
  int64_t sse;
  int64_t best_rd;
  int skip;
571
  const int16_t *scan, *nb;
Deb Mukherjee's avatar
Deb Mukherjee committed
572
573
574
575
576
577
578
579
580
581
582
};

static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
583
584
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Deb Mukherjee's avatar
Deb Mukherjee committed
585
586
587
  args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                &this_sse) >> shift;
  args->sse += this_sse >> shift;
588
589
590
591
592
593
594
595
596

  if (x->skip_encode &&
      xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
                    (1 << ss_txfrm_size)) >> shift;
    args->dist += p;
    args->sse  += p;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
597
598
599
600
601
}

static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
602
  MACROBLOCKD *const xd = &args->x->e_mbd;
Deb Mukherjee's avatar
Deb Mukherjee committed
603
  int x_idx, y_idx;
604
605
  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size,
                           &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
606

607
  args->rate += cost_coeffs(args->x, plane, block,
Deb Mukherjee's avatar
Deb Mukherjee committed
608
609
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
610
                            args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
611
612
}

613
614
// FIXME(jingning): need to make the rd test of chroma components consistent
// with that of luma component. this function should be deprecated afterwards.
Deb Mukherjee's avatar
Deb Mukherjee committed
615
616
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
617
618
619
620
621
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *pd = &xd->plane[plane];
  const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
622
  int i;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
623
624
625
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size,
                                    num_4x4_blocks_wide, num_4x4_blocks_high,
                                    0, 0, 0, INT64_MAX, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
626

627
628
  switch (tx_size) {
    case TX_4X4:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
629
630
631
632
      vpx_memcpy(&args.t_above, pd->above_context,
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
      vpx_memcpy(&args.t_left, pd->left_context,
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
633
634
635
636
      args.scan = vp9_default_scan_4x4;
      args.nb = vp9_default_scan_4x4_neighbors;
      break;
    case TX_8X8:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
637
638
639
640
      for (i = 0; i < num_4x4_blocks_wide; i += 2)
        args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
      for (i = 0; i < num_4x4_blocks_high; i += 2)
        args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
641
642
643
644
      args.scan = vp9_default_scan_8x8;
      args.nb = vp9_default_scan_8x8_neighbors;
      break;
    case TX_16X16:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
645
646
647
648
      for (i = 0; i < num_4x4_blocks_wide; i += 4)
        args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
      for (i = 0; i < num_4x4_blocks_high; i += 4)
        args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
649
650
651
652
      args.scan = vp9_default_scan_16x16;
      args.nb = vp9_default_scan_16x16_neighbors;
      break;
    case TX_32X32:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
653
654
655
656
      for (i = 0; i < num_4x4_blocks_wide; i += 8)
        args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
      for (i = 0; i < num_4x4_blocks_high; i += 8)
        args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
657
658
659
660
661
662
      args.scan = vp9_default_scan_32x32;
      args.nb = vp9_default_scan_32x32_neighbors;
      break;
    default:
      assert(0);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684

  foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
  return args.rate;
}

static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;

  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
  }
  return cost;
}

static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                                int shift, int64_t *sse) {
  int64_t sum = 0, this_sse;
  int plane;

  *sse = 0;
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
685
686
687
688
    struct macroblockd_plane *pd = &x->e_mbd.plane[plane];
    const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
    sum += vp9_block_error(x->plane[plane].coeff, pd->dqcoeff,
                           1 << num_pels_log2_lookup[bs], &this_sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
689
690
691
692
693
694
695
696
697
698
699
700
    *sse += this_sse;
  }
  *sse >>= shift;
  return sum >> shift;
}

static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct encode_b_args encode_args = {args->cm, x, NULL};
701
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
702

703
704
  if (args->skip)
    return;
705
706
707
708
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
  rd = MIN(rd1, rd2);
  if (rd > args->best_rd) {
709
710
711
712
713
714
715
    args->skip = 1;
    args->rate = INT_MAX;
    args->dist = INT64_MAX;
    args->sse  = INT64_MAX;
    return;
  }

Deb Mukherjee's avatar
Deb Mukherjee committed
716
717
718
719
720
721
722
723
724
725
726
727
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
    encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
  else
    xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);

  dist_block(plane, block, bsize, ss_txfrm_size, args);
  rate_block(plane, block, bsize, ss_txfrm_size, args);
}

static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skippable, int64_t *sse,
728
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
729
730
731
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *const pd = &xd->plane[0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
732
733
734
  const BLOCK_SIZE_TYPE bs = get_plane_block_size(bsize, pd);
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
735
  int i;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
736
737
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size,
                                    num_4x4_blocks_wide, num_4x4_blocks_high,
738
                                    0, 0, 0, ref_best_rd, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
739
  xd->mode_info_context->mbmi.txfm_size = tx_size;
740
741
742
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(&args.t_above, pd->above_context,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
743
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
744
      vpx_memcpy(&args.t_left, pd->left_context,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
745
                 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
746
747
      get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, 0),
                      &args.scan, &args.nb);
748
749
      break;
    case TX_8X8:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
750
      for (i = 0; i < num_4x4_blocks_wide; i += 2)
751
        args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
752
      for (i = 0; i < num_4x4_blocks_high; i += 2)
753
        args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
754
755
      get_scan_nb_8x8(get_tx_type_8x8(PLANE_TYPE_Y_WITH_DC, xd),
                      &args.scan, &args.nb);
756
757
      break;
    case TX_16X16:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
758
      for (i = 0; i < num_4x4_blocks_wide; i += 4)
759
        args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
760
      for (i = 0; i < num_4x4_blocks_high; i += 4)
761
        args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
762
763
      get_scan_nb_16x16(get_tx_type_16x16(PLANE_TYPE_Y_WITH_DC, xd),
                        &args.scan, &args.nb);
764
765
      break;
    case TX_32X32:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
766
      for (i = 0; i < num_4x4_blocks_wide; i += 8)
767
        args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
768
      for (i = 0; i < num_4x4_blocks_high; i += 8)
769
770
771
772
773
774
775
        args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
      args.scan = vp9_default_scan_32x32;
      args.nb = vp9_default_scan_32x32_neighbors;
      break;
    default:
      assert(0);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
776
777
778
779
780

  foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
  *distortion = args.dist;
  *rate       = args.rate;
  *sse        = args.sse;
781
  *skippable  = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
782
783
784
785
786
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
787
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
788
                                     BLOCK_SIZE_TYPE bs) {
789
  const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
790
791
792
793
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  if (max_txfm_size == TX_32X32 &&
794
795
      (cm->tx_mode == ALLOW_32X32 ||
       cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
796
797
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
798
799
800
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
801
    mbmi->txfm_size = TX_16X16;
802
  } else if (cm->tx_mode != ONLY_4X4) {
Deb Mukherjee's avatar
Deb Mukherjee committed
803
804
805
806
807
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
  super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
808
                           &sse[mbmi->txfm_size], ref_best_rd, bs,
Deb Mukherjee's avatar
Deb Mukherjee committed
809
810
811
812
                           mbmi->txfm_size);
  cpi->txfm_stepdown_count[0]++;
}

813
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
814
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
815
                                     int64_t *d, int64_t *distortion,
816
                                     int *s, int *skip,
817
                                     int64_t tx_cache[TX_MODES],
Deb Mukherjee's avatar
Deb Mukherjee committed
818
                                     BLOCK_SIZE_TYPE bs) {
819
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
820
821
822
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
823
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
824
  int64_t rd[TX_SIZES][2];
825
  int n, m;
826
  int s0, s1;
827

828
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
829

830
  for (n = TX_4X4; n <= max_tx_size; n++) {
831
    r[n][1] = r[n][0];
832
833
    if (r[n][0] == INT_MAX)
      continue;
834
    for (m = 0; m <= n - (n == max_tx_size); m++) {
835
      if (m == n)
836
        r[n][1] += vp9_cost_zero(tx_probs[m]);
837
      else
838
        r[n][1] += vp9_cost_one(tx_probs[m]);
839
840
    }
  }
841

842
843
844
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
845

846
  for (n = TX_4X4; n <= max_tx_size; n++) {
847
848
849
850
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
851
852
853
854
855
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
856
857
858
    }
  }

859
  if (max_tx_size == TX_32X32 &&
860
861
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
862
863
864
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
865
  } else if (max_tx_size >= TX_16X16 &&
866
867
868
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
869
870
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
871
    mbmi->txfm_size = TX_16X16;
872
873
874
875
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
876
877
878
879
880
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

881
  *distortion = d[mbmi->txfm_size];
882
  *rate       = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
883
884
  *skip       = s[mbmi->txfm_size];

885
886
887
888
889
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
  if (max_tx_size == TX_32X32 &&
890
891
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
892
893
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  else if (max_tx_size >= TX_16X16 &&
894
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
895
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
896
  else
897
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
898
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
899

900
  if (max_tx_size == TX_32X32 &&
Deb Mukherjee's avatar
Deb Mukherjee committed
901
902
903
904
      rd[TX_32X32][1] < rd[TX_16X16][1] &&
      rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[0]++;
905
  } else if (max_tx_size >= TX_16X16 &&
Deb Mukherjee's avatar
Deb Mukherjee committed
906
907
             rd[TX_16X16][1] < rd[TX_8X8][1] &&
             rd[TX_16X16][1] < rd[TX_4X4][1]) {
908
    cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
909
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
910
    cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
911
  } else {
912
    cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++;
913
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
914
}
915

Deb Mukherjee's avatar
Deb Mukherjee committed
916
917
918
919
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
920
                                          int64_t ref_best_rd,
921
                                          BLOCK_SIZE_TYPE bs) {
922
  const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
923
924
925
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
926
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
927
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
928
929
  int n, m;
  int s0, s1;
930
931
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
  // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
932

933
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
934

Deb Mukherjee's avatar
Deb Mukherjee committed
935
936
  // for (n = TX_4X4; n <= max_txfm_size; n++)
  //   r[n][0] = (r[n][0] * scale_r[n]);
937

Deb Mukherjee's avatar
Deb Mukherjee committed
938
939
940
941
942
943
944
945
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
946
  }
947

Deb Mukherjee's avatar
Deb Mukherjee committed
948
949
950
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
951

Deb Mukherjee's avatar
Deb Mukherjee committed
952
953
954
955
956
957
958
959
960
961
962
963
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
    }
  }
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    rd[n][0] = (scale_rd[n] * rd[n][0]);
    rd[n][1] = (scale_rd[n] * rd[n][1]);
  }
964

Deb Mukherjee's avatar
Deb Mukherjee committed
965
  if (max_txfm_size == TX_32X32 &&