vp9_rdopt.c 150 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17
18
19
20
21
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
22
23
24
25
26
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
27
28
29
30
31
32
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
33
#include "vpx_mem/vpx_mem.h"
34
35
36
37
38
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
39
#include "vp9_rtcd.h"
40
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
41
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
42

43
44
#define INVALID_MV 0x80008000

45
46
47
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

48
49
50
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

51
52
53
54
#define LAST_FRAME_MODE_MASK    0xFFDADCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFB5A3BB0
#define ALT_REF_MODE_MASK       0xFF8C648D0

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
57
58
  {RD_NEARESTMV, LAST_FRAME,   NONE},
  {RD_NEARESTMV, ALTREF_FRAME, NONE},
  {RD_NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
59

60
  {RD_DC_PRED,   INTRA_FRAME,  NONE},
61

Jim Bankoski's avatar
Jim Bankoski committed
62
  {RD_NEWMV,     LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
63
  {RD_NEWMV,     ALTREF_FRAME, NONE},
64
65
66
  {RD_NEWMV,     GOLDEN_FRAME, NONE},

  {RD_NEARMV,    LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
67
  {RD_NEARMV,    ALTREF_FRAME, NONE},
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  {RD_NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {RD_NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},

  {RD_TM_PRED,   INTRA_FRAME,  NONE},

  {RD_NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {RD_NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {RD_NEARMV,    GOLDEN_FRAME, NONE},
  {RD_NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {RD_NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},

  {RD_SPLITMV,   LAST_FRAME,   NONE},
  {RD_SPLITMV,   GOLDEN_FRAME, NONE},
  {RD_SPLITMV,   ALTREF_FRAME, NONE},
  {RD_SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
  {RD_SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

  {RD_ZEROMV,    LAST_FRAME,   NONE},
  {RD_ZEROMV,    GOLDEN_FRAME, NONE},
  {RD_ZEROMV,    ALTREF_FRAME, NONE},
  {RD_ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {RD_ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},

  {RD_I4X4_PRED, INTRA_FRAME,  NONE},
  {RD_H_PRED,    INTRA_FRAME,  NONE},
  {RD_V_PRED,    INTRA_FRAME,  NONE},
  {RD_D135_PRED, INTRA_FRAME,  NONE},
  {RD_D207_PRED, INTRA_FRAME,  NONE},
  {RD_D153_PRED, INTRA_FRAME,  NONE},
  {RD_D63_PRED,  INTRA_FRAME,  NONE},
  {RD_D117_PRED, INTRA_FRAME,  NONE},
  {RD_D45_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
100
101
};

102
103
104
105
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
106
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
107
108
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

109
110
#define MAX_RD_THRESH_FACT 64
#define RD_THRESH_INC 1
111

112
static void fill_token_costs(vp9_coeff_cost *c,
113
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
114
  int i, j, k, l;
115
116
117
118
119
120
121
122
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
123
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
124
                            vp9_coef_tree);
125
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
126
                                 vp9_coef_tree);
127
128
            assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
129
          }
130
131
}

132
133
134
135
136
137
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
138

139
// 3* dc_qlookup[Q]*dc_qlookup[Q];
140

141
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
142
143
144
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

145
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
146
147
148
149
150
151
152
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
153
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
154
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
155
  }
Paul Wilkins's avatar
Paul Wilkins committed
156
}
John Koleszar's avatar
John Koleszar committed
157

158
static int compute_rd_mult(int qindex) {
159
  const int q = vp9_dc_quant(qindex, 0);
160
  return (11 * q * q) >> 2;
161
162
}

163
164
165
166
167
168
169
170
171
static MB_PREDICTION_MODE rd_mode_to_mode(RD_PREDICTION_MODE rd_mode) {
  if (rd_mode == RD_SPLITMV || rd_mode == RD_I4X4_PRED) {
    assert(!"Invalid rd_mode");
    return MB_MODE_COUNT;
  }
  assert((int)rd_mode < (int)MB_MODE_COUNT);
  return (MB_PREDICTION_MODE)rd_mode;
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
172
173
174
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
175
176
}

177

Dmitry Kovalev's avatar
Dmitry Kovalev committed
178
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
179
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
180

181
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
182

John Koleszar's avatar
John Koleszar committed
183
184
185
186
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
187
  qindex = clamp(qindex, 0, MAXQ);
188

189
  cpi->RDDIV = 100;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
190
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
191
192
193
194
195
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
196
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
197
  }
198
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
199
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
200

201
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
202

Dmitry Kovalev's avatar
Dmitry Kovalev committed
203
204
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
205
206
  if (q < 8)
    q = 8;
207

208
209
210
211
212
  for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
    for (i = 0; i < MAX_MODES; i++) {
      // Threshold here seem unecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[]
      int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
John Koleszar's avatar
John Koleszar committed
213

214
215
      if (cpi->sf.thresh_mult[i] < thresh_max) {
        cpi->rd_threshes[bsize][i] =
216
217
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
218
219
      } else {
        cpi->rd_threshes[bsize][i] = INT_MAX;
John Koleszar's avatar
John Koleszar committed
220
      }
John Koleszar's avatar
John Koleszar committed
221
    }
John Koleszar's avatar
John Koleszar committed
222
  }
John Koleszar's avatar
John Koleszar committed
223

224
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
225

226
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
227
    vp9_cost_tokens(cpi->mb.partition_cost[i],
228
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
229
230
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
231
  /*rough estimate for costing*/
232
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
233

234
  if (cpi->common.frame_type != KEY_FRAME) {
235
    vp9_build_nmv_cost_table(
236
237
238
239
240
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
241
242
243
244
245
246

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
        cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
247
            cost_token(vp9_inter_mode_tree,
248
                       cpi->common.fc.inter_mode_probs[i],
Yaowu Xu's avatar
Yaowu Xu committed
249
                       vp9_inter_mode_encodings + (m - NEARESTMV));
250
    }
251
  }
John Koleszar's avatar
John Koleszar committed
252
253
}

Yaowu Xu's avatar
Yaowu Xu committed
254
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
255
256
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
257
258
259
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
260
261
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
262
263
  } else {
    double a = y - d;
264
265
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
266
267
268
  }
}

269
270
271
272
273
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
274
  // Normalized rate
275
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
299
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
322
323
324
325
326
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
327
  assert(x >= 0.0);
328
329
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
348
    model_rd_norm(x, &R, &D);
Yaowu Xu's avatar
Yaowu Xu committed
349
350
    *rate = (int)((n << 8) * R + 0.5);
    *dist = (int)(var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
351
352
353
354
  }
  vp9_clear_system_state();
}

355
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
356
357
358
359
360
361
362
363
364
365
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
366
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
367
368
369
370
371
372
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
Dmitry Kovalev's avatar
Dmitry Kovalev committed
373
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
374
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
375
376

    rate_sum += rate;
Yaowu Xu's avatar
Yaowu Xu committed
377
    dist_sum += (int)dist;
Deb Mukherjee's avatar
Deb Mukherjee committed
378
379
380
381
382
383
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

384
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
385
386
387
388
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
389
  int j, k;
390
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
391
392
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
393
394
  const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 << num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
395
396
  int rate_sum = 0;
  int64_t dist_sum = 0;
397
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
398
399
400
401
402
403
404
405
406
407
408
409

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
410

Deb Mukherjee's avatar
Deb Mukherjee committed
411
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
412
413
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
414
415
416
      int rate;
      int64_t dist;
      unsigned int sse;
417
418
419
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
420
      // sse works better than var, since there is no dc prediction used
421
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
422
423
424
425
426
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
427

Deb Mukherjee's avatar
Deb Mukherjee committed
428
  *out_rate_sum = rate_sum;
429
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
430
431
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
432
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
433
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
434
  int i;
435
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
436

437
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
438
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
439
    error += (unsigned)this_diff * this_diff;
440
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
441
  }
John Koleszar's avatar
John Koleszar committed
442

443
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
444
  return error;
John Koleszar's avatar
John Koleszar committed
445
446
}

447
448
449
450
451
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
452
static const int16_t band_counts[TX_SIZES][8] = {
453
454
455
456
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
457
458
};

459
static INLINE int cost_coeffs(MACROBLOCK *mb,
460
                              int plane, int block,
461
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
462
                              TX_SIZE tx_size,
463
                              const int16_t *scan, const int16_t *nb) {
464
  MACROBLOCKD *const xd = &mb->e_mbd;
465
  MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
466
467
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
468
  const int16_t *band_count = &band_counts[tx_size][1];
469
470
  const int eob = pd->eobs[block];
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
471
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
472
473
474
  unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
                   mb->token_costs[tx_size][type][ref];
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
475
  uint8_t token_cache[1024];
476
477
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
478
479

  // Check for consistency of tx_size with mode info
480
  assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
481
                                      : get_uv_tx_size(mbmi) == tx_size);
482

483
484
  if (eob == 0) {
    // single eob token
485
486
    cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
    c = 0;
487
  } else {
488
    int band_left = *band_count++;
489
490

    // dc token
491
492
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
493
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
494
    token_cache[0] = vp9_pt_energy_class[prev_t];
495
    ++token_costs;
496
497
498
499

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
500
      int t;
501
502
503

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
504
      pt = get_coef_context(nb, token_cache, c);
505
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
506
      token_cache[rc] = vp9_pt_energy_class[t];
507
      prev_t = t;
508
      if (!--band_left) {
509
510
        band_left = *band_count++;
        ++token_costs;
511
      }
512
    }
513
514

    // eob token
515
    if (band_left) {
516
      pt = get_coef_context(nb, token_cache, c);
517
      cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
518
    }
519
520
  }

521
  // is eob first coefficient;
522
  *A = *L = (c > 0);
523

524
525
526
  return cost;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
527
528
529
530
531
532
533
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
534
535
536
537
538
539
540
  int rate[256];
  int64_t dist[256];
  int64_t sse[256];
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
541
542
  int64_t best_rd;
  int skip;
543
  const int16_t *scan, *nb;
Deb Mukherjee's avatar
Deb Mukherjee committed
544
545
};

546
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
547
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
548
549
550
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
551
552
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
553
554
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
555
556
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
557
  args->dist[block] = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
Deb Mukherjee's avatar
Deb Mukherjee committed
558
                                &this_sse) >> shift;
559
  args->sse[block]  = this_sse >> shift;
560
561

  if (x->skip_encode &&
562
      xd->this_mi->mbmi.ref_frame[0] == INTRA_FRAME) {
563
564
565
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
                    (1 << ss_txfrm_size)) >> shift;
566
567
    args->dist[block] = p;
    args->sse[block]  = p;
568
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
569
570
}

571
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
572
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
573
  struct rdcost_block_args* args = arg;
574

Deb Mukherjee's avatar
Deb Mukherjee committed
575
  int x_idx, y_idx;
576
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
577

578
579
580
581
  args->rate[block] = cost_coeffs(args->x, plane, block,
                                  args->t_above + x_idx,
                                  args->t_left + y_idx, args->tx_size,
                                  args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
582
583
}

584
static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
585
                           TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
586
587
588
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
589
  struct encode_b_args encode_args = {x, NULL};
590
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
591

592
593
594
  if (args->skip)
    return;

595
  if (!is_inter_block(&xd->this_mi->mbmi))
596
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
597
  else
598
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
599

600
601
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
602
603
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);
604
605

  // TODO(jingning): temporarily enabled only for luma component
606
  rd = MIN(rd1, rd2);
607
608
609
  if (plane == 0)
    x->zcoeff_blk[tx_size][block] = rd1 > rd2;

610
611
612
613
614
615
616
617
618
  args->this_rate += args->rate[block];
  args->this_dist += args->dist[block];
  args->this_sse  += args->sse[block];
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
619
620
}

621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
      assert(!"Invalid transform size.");
  }
}

654
static void txfm_rd_in_plane(MACROBLOCK *x,
655
656
657
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
658
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
659
  MACROBLOCKD *const xd = &x->e_mbd;
660
  struct macroblockd_plane *const pd = &xd->plane[plane];
661
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
662
663
664
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];

665
  struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size,
666
                                    num_4x4_w, num_4x4_h,
667
668
                                    { 0 }, { 0 }, { 0 },
                                    0, 0, 0, 0, ref_best_rd, 0 };
669
  if (plane == 0)
670
    xd->this_mi->mbmi.tx_size = tx_size;
671

672
673
674
  vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left,
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
675
676
  switch (tx_size) {
    case TX_4X4:
677
      get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0),
678
                      &args.scan, &args.nb);
679
680
      break;
    case TX_8X8:
681
      get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd),
682
                      &args.scan, &args.nb);
683
684
      break;
    case TX_16X16:
685
      get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd),
686
                        &args.scan, &args.nb);
687
688
689
690
691
692
693
694
      break;
    case TX_32X32:
      args.scan = vp9_default_scan_32x32;
      args.nb = vp9_default_scan_32x32_neighbors;
      break;
    default:
      assert(0);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
695

696
  foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args);
697
698
699
700
701
702
703
704
705
706
707
  if (args.skip) {
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
    *skippable  = vp9_is_skippable_in_plane(xd, bsize, plane);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
708
709
710
711
712
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
713
                                     int64_t ref_best_rd,
714
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
715
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
716
717
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
718
  MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
719
  if (max_tx_size == TX_32X32 &&
720
721
      (cm->tx_mode == ALLOW_32X32 ||
       cm->tx_mode == TX_MODE_SELECT)) {
722
    mbmi->tx_size = TX_32X32;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
723
  } else if (max_tx_size >= TX_16X16 &&
724
725
726
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              cm->tx_mode == TX_MODE_SELECT)) {
727
    mbmi->tx_size = TX_16X16;
728
  } else if (cm->tx_mode != ONLY_4X4) {
729
    mbmi->tx_size = TX_8X8;
Deb Mukherjee's avatar
Deb Mukherjee committed
730
  } else {
731
    mbmi->tx_size = TX_4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
732
  }
733
  txfm_rd_in_plane(x, rate, distortion, skip,
734
735
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
736
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
737
738
}

739
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
740
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
741
                                     int64_t *d, int64_t *distortion,
742
                                     int *s, int *skip,
743
                                     int64_t tx_cache[TX_MODES],
744
                                     BLOCK_SIZE bs) {
745
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
746
747
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
748
  MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi;
749
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
750
  int64_t rd[TX_SIZES][2];
751
  int n, m;
752
  int s0, s1;
753

754
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs, xd->this_mi);
755

756
  for (n = TX_4X4; n <= max_tx_size; n++) {
757
    r[n][1] = r[n][0];
758
759
    if (r[n][0] == INT_MAX)
      continue;
760
    for (m = 0; m <= n - (n == max_tx_size); m++) {
761
      if (m == n)
762
        r[n][1] += vp9_cost_zero(tx_probs[m]);
763
      else
764
        r[n][1] += vp9_cost_one(tx_probs[m]);
765
766
    }
  }
767

768
769
770
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
771

772
  for (n = TX_4X4; n <= max_tx_size; n++) {
773
774
775
776
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
777
778
779
780
781
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
782
783
784
    }
  }

785
  if (max_tx_size == TX_32X32 &&
786
787
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
788
789
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
790
    mbmi->tx_size = TX_32X32;
791
  } else if (max_tx_size >= TX_16X16 &&
792
793
794
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
795
796
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
797
    mbmi->tx_size = TX_16X16;
798
799
800
801
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
802
    mbmi->tx_size = TX_8X8;
803
  } else {
804
    mbmi->tx_size = TX_4X4;
805
806
  }

807
808
809
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
810

811
812
813
814
815
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
  if (max_tx_size == TX_32X32 &&
816
817
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
818
819
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  else if (max_tx_size >= TX_16X16 &&
820
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
821
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
822
  else
823
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
824
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
825

826
  if (max_tx_size == TX_32X32 &&
Deb Mukherjee's avatar
Deb Mukherjee committed
827
828
829
      rd[TX_32X32][1] < rd[TX_16X16][1] &&
      rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
830
    cpi->tx_stepdown_count[0]++;
831
  } else if (max_tx_size >= TX_16X16 &&
Deb Mukherjee's avatar
Deb Mukherjee committed
832
833
             rd[TX_16X16][1] < rd[TX_8X8][1] &&
             rd[TX_16X16][1] < rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
834
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
835
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
836
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
837
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
838
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
839
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
840
}
841

Deb Mukherjee's avatar
Deb Mukherjee committed
842
843
844
845
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
846
                                          int64_t ref_best_rd,
847
                                          BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
848
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
849
850
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
851
  MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi;
852
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
853
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
854
855
  int n, m;
  int s0, s1;
856
857
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
  // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
858

859
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs,  xd->this_mi);
860

Deb Mukherjee's avatar
Deb Mukherjee committed
861
862
  // for (n = TX_4X4; n <= max_txfm_size; n++)
  //   r[n][0] = (r[n][0] * scale_r[n]);
863

Dmitry Kovalev's avatar
Dmitry Kovalev committed
864
  for (n = TX_4X4; n <= max_tx_size; n++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
865
    r[n][1] = r[n][0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
866
    for (m = 0; m <= n - (n == max_tx_size); m++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
867
868
869
870
871
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
872
  }
873

Deb Mukherjee's avatar
Deb Mukherjee committed
874
875
876
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
877

Dmitry Kovalev's avatar
Dmitry Kovalev committed
878
  for (n = TX_4X4; n <= max_tx_size; n++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
879
880
881
882
883
884
885
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
    }
  }
Dmitry Kovalev's avatar
Dmitry Kovalev committed
886
  for (n = TX_4X4; n <= max_tx_size; n++) {
Yaowu Xu's avatar
Yaowu Xu committed
887
888
    rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]);
    rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]);
Deb Mukherjee's avatar
Deb Mukherjee committed
889
  }
890

Dmitry Kovalev's avatar
Dmitry Kovalev committed
891
  if (max_tx_size == TX_32X32 &&