vp9_rdopt.c 164 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17
18
19
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
20
21
22
23
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_quant_common.h"
24
25
26
27
28
29
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
30
#include "vpx_mem/vpx_mem.h"
31
32
33
34
35
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
36
#include "./vp9_rtcd.h"
37
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
38
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
39

40
41
#define INVALID_MV 0x80008000

42
43
44
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

45
46
47
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
48

Paul Wilkins's avatar
Paul Wilkins committed
49
50
#define MIN_EARLY_TERM_INDEX    3

51
52
53
54
55
56
57
58
59
typedef struct {
  MB_PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

60
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, NONE}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},
  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
98
99
100
};

const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
101
102
103
104
105
106
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
107
108
};

109
110
111
112
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
113
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
114
115
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

Deb Mukherjee's avatar
Deb Mukherjee committed
116
117
118
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
#define RD_THRESH_POW      1.25
119
#define RD_MULT_EPB_RATIO  64
Deb Mukherjee's avatar
Deb Mukherjee committed
120

121
122
#define MV_COST_WEIGHT      108
#define MV_COST_WEIGHT_SUB  120
123

124
125
126
127
128
129
130
131
132
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
133
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
134
135
136
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
static void fill_mode_costs(VP9_COMP *c) {
  VP9_COMMON *const cm = &c->common;
  int i, j;

  for (i = 0; i < INTRA_MODES; i++)
    for (j = 0; j < INTRA_MODES; j++)
      vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
                      vp9_intra_mode_tree);

  // TODO(rbultje) separate tables for superblock costing?
  vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1],
                  vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
                  cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
                  vp9_kf_uv_mode_prob[INTRA_MODES - 1],
                  vp9_intra_mode_tree);

  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
    vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
                    cm->fc.switchable_interp_prob[i],
                    vp9_switchable_interp_tree);
}

161
static void fill_token_costs(vp9_coeff_cost *c,
162
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
163
  int i, j, k, l;
164
  TX_SIZE t;
165
  for (t = TX_4X4; t <= TX_32X32; ++t)
166
    for (i = 0; i < PLANE_TYPES; ++i)
167
168
169
      for (j = 0; j < REF_TYPES; ++j)
        for (k = 0; k < COEF_BANDS; ++k)
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
170
171
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
172
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
173
                            vp9_coef_tree);
174
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
175
                                 vp9_coef_tree);
176
177
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
                   c[t][i][j][k][1][l][EOB_TOKEN]);
178
          }
179
180
}

181
182
183
184
185
186
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
187

188
// 3* dc_qlookup[Q]*dc_qlookup[Q];
189

190
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
191
192
193
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

194
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
195
196
197
198
199
200
201
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
202
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
203
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
204
  }
Paul Wilkins's avatar
Paul Wilkins committed
205
}
John Koleszar's avatar
John Koleszar committed
206

207
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
208
  const int q = vp9_dc_quant(qindex, 0);
209
  // TODO(debargha): Adjust the function below
210
211
212
213
214
215
216
217
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
218
219
220
221
222
223
224
225
226
}

static int compute_rd_thresh_factor(int qindex) {
  int q;
  // TODO(debargha): Adjust the function below
  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  if (q < 8)
    q = 8;
  return q;
227
228
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
229
230
231
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
232
233
}

234
235
236
237
238
239
240
241
242
static void set_block_thresholds(VP9_COMP *cpi) {
  int i, bsize, segment_id;
  VP9_COMMON *cm = &cpi->common;

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    int q;
    int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
    segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
    q = compute_rd_thresh_factor(segment_qindex);
243

244
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
245
246
247
      // Threshold here seem unecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[]
      int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
John Koleszar's avatar
John Koleszar committed
248

249
250
251
252
253
254
255
256
      for (i = 0; i < MAX_MODES; ++i) {
        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[segment_id][bsize][i] =
              cpi->sf.thresh_mult[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
        }
John Koleszar's avatar
John Koleszar committed
257
      }
258

259
260
261
262
263
264
265
266
      for (i = 0; i < MAX_REFS; ++i) {
        if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
              cpi->sf.thresh_mult_sub8x8[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
        }
267
268
      }
    }
John Koleszar's avatar
John Koleszar committed
269
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
270
271
}

272
273
274
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  int qindex, i;
Deb Mukherjee's avatar
Deb Mukherjee committed
275
276
277
278
279
280
281

  vp9_clear_system_state();  // __asm emms;

  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
282
  qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
Deb Mukherjee's avatar
Deb Mukherjee committed
283

284
  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
285
286
  cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);

287
  cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
Deb Mukherjee's avatar
Deb Mukherjee committed
288
289
290
291
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);

  vp9_set_speed_features(cpi);

292
293
  cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                              cm->frame_type != KEY_FRAME) ?
294
                              0 : 1;
295

296
  set_block_thresholds(cpi);
John Koleszar's avatar
John Koleszar committed
297

298
  fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
299

300
  for (i = 0; i < PARTITION_CONTEXTS; i++)
301
    vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
302
303
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
304
  /*rough estimate for costing*/
305
  fill_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
306

307
  if (!frame_is_intra_only(cm)) {
308
    vp9_build_nmv_cost_table(
309
        cpi->mb.nmvjointcost,
310
311
312
        cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cm->fc.nmvc,
        cm->allow_high_precision_mv, 1, 1);
313

314
315
316
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
      vp9_cost_tokens((int *)cpi->mb.inter_mode_cost[i],
                      cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
317
  }
John Koleszar's avatar
John Koleszar committed
318
319
}

320
static const int MAX_XSQ_Q10 = 245727;
Deb Mukherjee's avatar
Deb Mukherjee committed
321

322
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
323
  // NOTE: The tables below must be of the same size
324
325
326
327

  // The functions described below are sampled at the four most significant
  // bits of x^2 + 8 / 256

Deb Mukherjee's avatar
Deb Mukherjee committed
328
  // Normalized rate
329
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
330
331
332
333
334
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
335
336
337
338
339
340
341
342
343
344
345
346
347
348
  static const int rate_tab_q10[] = {
    65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
     4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
     3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
     3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
     2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
     2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
     1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
     1159,  1086,  1021,   963,   911,   864,   821,   781,
      745,   680,   623,   574,   530,   490,   455,   424,
      395,   345,   304,   269,   239,   213,   190,   171,
      154,   126,   104,    87,    73,    61,    52,    44,
       38,    28,    21,    16,    12,    10,     8,     6,
        5,     3,     2,     1,     1,     1,     0,     0,
Deb Mukherjee's avatar
Deb Mukherjee committed
349
350
  };
  // Normalized distortion
351
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
352
353
354
355
356
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
  static const int dist_tab_q10[] = {
       0,     0,     1,     1,     1,     2,     2,     2,
       3,     3,     4,     5,     5,     6,     7,     7,
       8,     9,    11,    12,    13,    15,    16,    17,
      18,    21,    24,    26,    29,    31,    34,    36,
      39,    44,    49,    54,    59,    64,    69,    73,
      78,    88,    97,   106,   115,   124,   133,   142,
     151,   167,   184,   200,   215,   231,   245,   260,
     274,   301,   327,   351,   375,   397,   418,   439,
     458,   495,   528,   559,   587,   613,   637,   659,
     680,   717,   749,   777,   801,   823,   842,   859,
     874,   899,   919,   936,   949,   960,   969,   977,
     983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
    1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
  };
  static const int xsq_iq_q10[] = {
         0,      4,      8,     12,     16,     20,     24,     28,
        32,     40,     48,     56,     64,     72,     80,     88,
        96,    112,    128,    144,    160,    176,    192,    208,
       224,    256,    288,    320,    352,    384,    416,    448,
       480,    544,    608,    672,    736,    800,    864,    928,
       992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
      2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
      4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
      8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
     32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
     65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
    131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
Deb Mukherjee's avatar
Deb Mukherjee committed
386
  };
387
  /*
388
389
390
391
  static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
  assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
  assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
  assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
392
  */
393
394
395
396
397
398
399
400
  int tmp = (xsq_q10 >> 2) + 8;
  int k = get_msb(tmp) - 3;
  int xq = (k << 3) + ((tmp >> k) & 0x7);
  const int one_q10 = 1 << 10;
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
  const int b_q10 = one_q10 - a_q10;
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
Deb Mukherjee's avatar
Deb Mukherjee committed
401
402
}

403
404
405
static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
                                     unsigned int qstep, int *rate,
                                     int64_t *dist) {
Deb Mukherjee's avatar
Deb Mukherjee committed
406
407
408
409
410
411
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
412
  if (var == 0) {
Deb Mukherjee's avatar
Deb Mukherjee committed
413
414
415
    *rate = 0;
    *dist = 0;
  } else {
416
417
418
419
420
421
422
    int d_q10, r_q10;
    uint64_t xsq_q10_64 =
        ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
    int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ? MAX_XSQ_Q10 : xsq_q10_64;
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
    *rate = (n * r_q10 + 2) >> 2;
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
Deb Mukherjee's avatar
Deb Mukherjee committed
423
424
425
  }
}

426
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
427
428
429
430
431
432
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;
433
434
  int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
  unsigned int sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
435
436
437
438

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
439
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
440
441
442
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
443
444
445
                              pd->dst.buf, pd->dst.stride, &sse);
    if (i == 0)
      x->pred_sse[ref] = sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
446
    // sse works better than var, since there is no dc prediction used
447
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
448
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
449
450

    rate_sum += rate;
Yaowu Xu's avatar
Yaowu Xu committed
451
    dist_sum += (int)dist;
Deb Mukherjee's avatar
Deb Mukherjee committed
452
453
454
455
456
457
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

458
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
459
460
461
462
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
463
  int j, k;
464
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
465
466
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
467
468
  const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 << num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
469
470
  int rate_sum = 0;
  int64_t dist_sum = 0;
471
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
472
473
474
475
476
477
478
479
480
481
482
483

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
484

Deb Mukherjee's avatar
Deb Mukherjee committed
485
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
486
487
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
488
489
490
      int rate;
      int64_t dist;
      unsigned int sse;
491
492
493
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
494
      // sse works better than var, since there is no dc prediction used
495
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
496
497
498
499
500
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
501

Deb Mukherjee's avatar
Deb Mukherjee committed
502
  *out_rate_sum = rate_sum;
503
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
504
505
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
506
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
507
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
508
  int i;
509
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
510

511
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
512
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
513
    error += (unsigned)this_diff * this_diff;
514
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
515
  }
John Koleszar's avatar
John Koleszar committed
516

517
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
518
  return error;
John Koleszar's avatar
John Koleszar committed
519
520
}

521
522
523
524
525
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
526
static const int16_t band_counts[TX_SIZES][8] = {
527
528
529
530
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
531
532
};

533
static INLINE int cost_coeffs(MACROBLOCK *x,
534
                              int plane, int block,
535
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
536
                              TX_SIZE tx_size,
537
                              const int16_t *scan, const int16_t *nb) {
538
  MACROBLOCKD *const xd = &x->e_mbd;
539
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
540
  struct macroblock_plane *p = &x->plane[plane];
541
542
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
543
  const int16_t *band_count = &band_counts[tx_size][1];
544
  const int eob = p->eobs[block];
545
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
546
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
547
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
548
                   x->token_costs[tx_size][type][ref];
549
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
550
  uint8_t *p_tok = x->token_cache;
551
552
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
553
554

  // Check for consistency of tx_size with mode info
555
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
556
                                      : get_uv_tx_size(mbmi) == tx_size);
557

558
559
  if (eob == 0) {
    // single eob token
560
    cost = token_costs[0][0][pt][EOB_TOKEN];
561
    c = 0;
562
  } else {
563
    int band_left = *band_count++;
564
565

    // dc token
566
567
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
568
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
569
    p_tok[0] = vp9_pt_energy_class[prev_t];
570
    ++token_costs;
571
572
573
574

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
575
      int t;
576
577
578

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
579
      pt = get_coef_context(nb, p_tok, c);
580
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
581
      p_tok[rc] = vp9_pt_energy_class[t];
582
      prev_t = t;
583
      if (!--band_left) {
584
585
        band_left = *band_count++;
        ++token_costs;
586
      }
587
    }
588
589

    // eob token
590
    if (band_left) {
591
      pt = get_coef_context(nb, p_tok, c);
592
      cost += (*token_costs)[0][pt][EOB_TOKEN];
593
    }
594
595
  }

596
  // is eob first coefficient;
597
  *A = *L = (c > 0);
598

599
600
601
  return cost;
}

602
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
603
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
604
605
606
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
607
608
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
609
610
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
611
612
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
613
614
615
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
616

617
  if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
618
619
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
620
                    (1 << ss_txfrm_size)) >> (shift + 2);
621
622
    args->dist += (p >> 4);
    args->sse  += p;
623
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
624
625
}

626
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
627
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
628
  struct rdcost_block_args* args = arg;
629

Deb Mukherjee's avatar
Deb Mukherjee committed
630
  int x_idx, y_idx;
631
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
632

633
634
635
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                           args->t_left + y_idx, args->tx_size,
                           args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
636
637
}

638
639
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
640
641
642
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
643
  struct encode_b_args encode_args = {x, NULL};
644
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
645

646
647
648
  if (args->skip)
    return;

649
  if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
650
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
651
  else
652
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
653

654
655
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
656
657
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
658
659

  // TODO(jingning): temporarily enabled only for luma component
660
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
661
  if (plane == 0)
662
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
663
                                    (rd1 > rd2 && !xd->lossless);
664

665
666
667
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
668
669
670
671
672
673
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
674
675
}

676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
James Zern's avatar
James Zern committed
705
      assert(0 && "Invalid transform size.");
706
707
708
  }
}

709
710
711
712
713
714
715
716
717
718
719
720
static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
                              const int num_4x4_w, const int num_4x4_h,
                              const int64_t ref_rdcost,
                              struct rdcost_block_args *arg) {
  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
  arg->x = x;
  arg->tx_size = tx_size;
  arg->bw = num_4x4_w;
  arg->bh = num_4x4_h;
  arg->best_rd = ref_rdcost;
}

721
static void txfm_rd_in_plane(MACROBLOCK *x,
722
                             struct rdcost_block_args *rd_stack,
723
724
725
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
726
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
727
  MACROBLOCKD *const xd = &x->e_mbd;
728
  struct macroblockd_plane *const pd = &xd->plane[plane];
729
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
730
731
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
732
  const scan_order *so;
733

734
735
  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
                    ref_best_rd, rd_stack);
736
  if (plane == 0)
737
    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
738

739
  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
740
741
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
742

743
744
745
  so = get_scan(xd, tx_size, pd->plane_type, 0);
  rd_stack->scan = so->scan;
  rd_stack->nb = so->neighbors;
Deb Mukherjee's avatar
Deb Mukherjee committed
746

747
  foreach_transformed_block_in_plane(xd, bsize, plane,
748
                                     block_rd_txfm, rd_stack);
749
  if (rd_stack->skip) {
750
751
752
753
754
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
755
756
757
    *distortion = rd_stack->this_dist;
    *rate       = rd_stack->this_rate;
    *sse        = rd_stack->this_sse;
758
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
759
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
760
761
762
763
764
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
765
                                     int64_t ref_best_rd,
766
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
767
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
768
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
769
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
770
  MACROBLOCKD *const xd = &x->e_mbd;
771
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
772
773
774

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

775
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
776
777
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
778
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
779
780
}

781
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
782
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
783
                                     int64_t *d, int64_t *distortion,
784
                                     int *s, int *skip,
785
                                     int64_t tx_cache[TX_MODES],
786
                                     BLOCK_SIZE bs) {
787
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
788
789
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
790
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
791
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
792
  int64_t rd[TX_SIZES][2];
793
  int n, m;
794
  int s0, s1;
795
796
797
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
798

799
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
800
801
802
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
803

804
  for (n = TX_4X4; n <= max_tx_size; n++) {
805
806
807
808
809
810
811
812
813
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
814
815
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
816
    } else if (s[n]) {
817
818
819
820
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
821
    }
822
823
824
825
826

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
827
  }
828
829
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
830
831


832
833
834
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
835

836
837
838
839
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
840

841
842
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
843
    cpi->tx_stepdown_count[0]++;
844
845
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
846
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
847
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
848
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
849
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
850
  } else {
851
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
852
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
853
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
854
}
855

Deb Mukherjee's avatar
Deb Mukherjee committed
856
857
858
859
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
860
                                          int64_t ref_best_rd,
861
                                          BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
862
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
863
864
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
865
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
866
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
867
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
868
869
  int n, m;
  int s0, s1;
870
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
871
872
873
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
874

875
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
876
877
878
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
879

Dmitry Kovalev's avatar
Dmitry Kovalev committed
880
  for (n = TX_4X4; n <= max_tx_size; n++) {
881
    double scale = scale_rd[n];
Deb Mukherjee's avatar
Deb Mukherjee committed
882
    r[n][1] = r[n][0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
883
    for (m = 0; m <= n - (n == max_tx_size); m++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
884
885
886
887
888
889
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
    if (s[n]) {
890
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
Deb Mukherjee's avatar
Deb Mukherjee committed
891
    } else {
892
893
894
895
896
897
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
    }
    if (rd[n][1] < best_rd) {
      best_rd = rd[n][1];
      best_tx = n;
Deb Mukherjee's avatar
Deb Mukherjee committed
898
899
    }
  }
900

901
902
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
903

904
905
  // Actually encode using the chosen mode if a model was used, but do not
  // update the r, d costs
906
907
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
908

909
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
910
    cpi->tx_stepdown_count[0]++;
911
  } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
912
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
913
  } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
914
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
915
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
916
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
917
  }
918
919
}

920
static void super_block_yrd(VP9_COMP *cpi,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
921
                            MACROBLOCK *x, int *rate, int64_t *distortion,
922
                            int *skip, int64_t *psse, BLOCK_SIZE bs,
923
                            int64_t txfm_cache[TX_MODES],
924
                            int64_t ref_best_rd) {
925
926
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
Jim Bankoski's avatar
Jim Bankoski committed
927
  MACROBLOCKD *xd = &x->e_mbd;