vp9_rdopt.c 159 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
13
#include <math.h>
#include <stdio.h>
14

15
16
17
18
19
20
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
21
#include "vp9/common/vp9_entropymode.h"
22
23
24
25
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
26
27
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
28
29
30
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
31
#include "vp9/encoder/vp9_cost.h"
32
#include "vp9/encoder/vp9_encodemb.h"
33
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
34
#include "vp9/encoder/vp9_encoder.h"
35
#include "vp9/encoder/vp9_mcomp.h"
36
#include "vp9/encoder/vp9_quantize.h"
37
#include "vp9/encoder/vp9_ratectrl.h"
38
39
40
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
41

42
43
44
45
46
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
#define RD_THRESH_POW      1.25
#define RD_MULT_EPB_RATIO  64

47
48
49
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

50
51
52
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
53

Paul Wilkins's avatar
Paul Wilkins committed
54
55
#define MIN_EARLY_TERM_INDEX    3

56
typedef struct {
57
  PREDICTION_MODE mode;
58
59
60
61
62
63
64
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
65
66
67
68
69
70
71
72
73
74
75
76
77
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
78
  int use_fast_coef_costing;
79
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
80
81
};

82
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, NONE}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},
  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
120
121
};

122
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
123
124
125
126
127
128
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
129
130
};

131
132
133
134
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
135
136
137
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
};
138

139
140
141
142
143
144
145
146
147
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
148
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
149
150
151
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
152
static void fill_mode_costs(VP9_COMP *cpi) {
153
  const FRAME_CONTEXT *const fc = &cpi->common.fc;
154
155
156
157
  int i, j;

  for (i = 0; i < INTRA_MODES; i++)
    for (j = 0; j < INTRA_MODES; j++)
158
      vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
159
160
161
                      vp9_intra_mode_tree);

  // TODO(rbultje) separate tables for superblock costing?
162
163
  vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
  vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
164
                  vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
165
  vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
166
                  fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
167
168

  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
169
    vp9_cost_tokens(cpi->switchable_interp_costs[i],
170
                    fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
171
172
}

173
static void fill_token_costs(vp9_coeff_cost *c,
174
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
175
  int i, j, k, l;
176
  TX_SIZE t;
177
  for (t = TX_4X4; t <= TX_32X32; ++t)
178
    for (i = 0; i < PLANE_TYPES; ++i)
179
180
181
      for (j = 0; j < REF_TYPES; ++j)
        for (k = 0; k < COEF_BANDS; ++k)
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
182
183
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
184
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
185
                            vp9_coef_tree);
186
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
187
                                 vp9_coef_tree);
188
189
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
                   c[t][i][j][k][1][l][EOB_TOKEN]);
190
          }
191
192
}

193
static const uint8_t rd_iifactor[32] = {
194
195
196
197
198
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
199

200
// 3* dc_qlookup[Q]*dc_qlookup[Q];
201

202
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
203
204
205
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

206
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
207
208
209
210
211
212
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
213
214
215
    const double q = vp9_convert_qindex_to_q(i);
    sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
    sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
John Koleszar's avatar
John Koleszar committed
216
  }
Paul Wilkins's avatar
Paul Wilkins committed
217
}
John Koleszar's avatar
John Koleszar committed
218

219
int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
220
  const int q = vp9_dc_quant(qindex, 0);
221
  // TODO(debargha): Adjust the function below
222
223
224
225
226
227
228
229
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
230
231
232
233
}

static int compute_rd_thresh_factor(int qindex) {
  // TODO(debargha): Adjust the function below
234
235
  const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  return MAX(q, 8);
236
237
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
238
239
240
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
241
242
}

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
268
static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
269
270
271
  int i, bsize, segment_id;

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
272
273
274
275
    const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
                                            cm->base_qindex) + cm->y_dc_delta_q,
                             0, MAXQ);
    const int q = compute_rd_thresh_factor(qindex);
276

277
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
278
279
      // Threshold here seems unnecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[].
Dmitry Kovalev's avatar
Dmitry Kovalev committed
280
281
282
      const int t = q * rd_thresh_block_size_factor[bsize];
      const int thresh_max = INT_MAX / t;

Alex Converse's avatar
Alex Converse committed
283
284
285
286
287
288
289
290
291
292
293
294
      if (bsize >= BLOCK_8X8) {
        for (i = 0; i < MAX_MODES; ++i)
          rd->threshes[segment_id][bsize][i] =
              rd->thresh_mult[i] < thresh_max
                  ? rd->thresh_mult[i] * t / 4
                  : INT_MAX;
      } else {
        for (i = 0; i < MAX_REFS; ++i)
          rd->threshes[segment_id][bsize][i] =
              rd->thresh_mult_sub8x8[i] < thresh_max
                  ? rd->thresh_mult_sub8x8[i] * t / 4
                  : INT_MAX;
295
296
      }
    }
John Koleszar's avatar
John Koleszar committed
297
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
298
299
}

300
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
301
302
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCK *const x = &cpi->mb;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
303
  RD_OPT *const rd = &cpi->rd;
304
  int i;
Deb Mukherjee's avatar
Deb Mukherjee committed
305

306
  vp9_clear_system_state();
Deb Mukherjee's avatar
Deb Mukherjee committed
307

Dmitry Kovalev's avatar
Dmitry Kovalev committed
308
309
  rd->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
  rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
310

Dmitry Kovalev's avatar
Dmitry Kovalev committed
311
  x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
312
  x->errorperbit += (x->errorperbit == 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
313

Dmitry Kovalev's avatar
Dmitry Kovalev committed
314
315
  x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                         cm->frame_type != KEY_FRAME) ? 0 : 1;
316

Dmitry Kovalev's avatar
Dmitry Kovalev committed
317
  set_block_thresholds(cm, rd);
John Koleszar's avatar
John Koleszar committed
318

319
  if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
320
321
    fill_token_costs(x->token_costs, cm->fc.coef_probs);

Jim Bankoski's avatar
Jim Bankoski committed
322
    for (i = 0; i < PARTITION_CONTEXTS; i++)
323
      vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
Jim Bankoski's avatar
Jim Bankoski committed
324
                      vp9_partition_tree);
325
  }
Jim Bankoski's avatar
Jim Bankoski committed
326

327
328
  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
      cm->frame_type == KEY_FRAME) {
Jim Bankoski's avatar
Jim Bankoski committed
329
330
331
332
333
334
    fill_mode_costs(cpi);

    if (!frame_is_intra_only(cm)) {
      vp9_build_nmv_cost_table(x->nmvjointcost,
                               cm->allow_high_precision_mv ? x->nmvcost_hp
                                                           : x->nmvcost,
335
                               &cm->fc.nmvc, cm->allow_high_precision_mv);
Jim Bankoski's avatar
Jim Bankoski committed
336
337

      for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
338
        vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
Jim Bankoski's avatar
Jim Bankoski committed
339
340
                        cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
    }
341
  }
John Koleszar's avatar
John Koleszar committed
342
343
}

344
static const int MAX_XSQ_Q10 = 245727;
Deb Mukherjee's avatar
Deb Mukherjee committed
345

346
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
347
  // NOTE: The tables below must be of the same size
348
349
350
351

  // The functions described below are sampled at the four most significant
  // bits of x^2 + 8 / 256

Deb Mukherjee's avatar
Deb Mukherjee committed
352
  // Normalized rate
353
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
354
355
356
357
358
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
359
360
361
362
363
364
365
366
367
368
369
370
371
372
  static const int rate_tab_q10[] = {
    65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
     4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
     3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
     3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
     2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
     2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
     1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
     1159,  1086,  1021,   963,   911,   864,   821,   781,
      745,   680,   623,   574,   530,   490,   455,   424,
      395,   345,   304,   269,   239,   213,   190,   171,
      154,   126,   104,    87,    73,    61,    52,    44,
       38,    28,    21,    16,    12,    10,     8,     6,
        5,     3,     2,     1,     1,     1,     0,     0,
Deb Mukherjee's avatar
Deb Mukherjee committed
373
374
  };
  // Normalized distortion
375
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
376
377
378
379
380
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
  static const int dist_tab_q10[] = {
       0,     0,     1,     1,     1,     2,     2,     2,
       3,     3,     4,     5,     5,     6,     7,     7,
       8,     9,    11,    12,    13,    15,    16,    17,
      18,    21,    24,    26,    29,    31,    34,    36,
      39,    44,    49,    54,    59,    64,    69,    73,
      78,    88,    97,   106,   115,   124,   133,   142,
     151,   167,   184,   200,   215,   231,   245,   260,
     274,   301,   327,   351,   375,   397,   418,   439,
     458,   495,   528,   559,   587,   613,   637,   659,
     680,   717,   749,   777,   801,   823,   842,   859,
     874,   899,   919,   936,   949,   960,   969,   977,
     983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
    1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
  };
  static const int xsq_iq_q10[] = {
         0,      4,      8,     12,     16,     20,     24,     28,
        32,     40,     48,     56,     64,     72,     80,     88,
        96,    112,    128,    144,    160,    176,    192,    208,
       224,    256,    288,    320,    352,    384,    416,    448,
       480,    544,    608,    672,    736,    800,    864,    928,
       992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
      2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
      4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
      8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
     32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
     65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
    131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
Deb Mukherjee's avatar
Deb Mukherjee committed
410
  };
411
  /*
412
413
414
415
  static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
  assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
  assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
  assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
416
  */
417
418
419
420
421
422
423
424
  int tmp = (xsq_q10 >> 2) + 8;
  int k = get_msb(tmp) - 3;
  int xq = (k << 3) + ((tmp >> k) & 0x7);
  const int one_q10 = 1 << 10;
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
  const int b_q10 = one_q10 - a_q10;
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
Deb Mukherjee's avatar
Deb Mukherjee committed
425
426
}

427
428
429
void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
                                  unsigned int qstep, int *rate,
                                  int64_t *dist) {
Deb Mukherjee's avatar
Deb Mukherjee committed
430
431
432
433
434
435
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
436
  if (var == 0) {
Deb Mukherjee's avatar
Deb Mukherjee committed
437
438
439
    *rate = 0;
    *dist = 0;
  } else {
440
    int d_q10, r_q10;
441
    const uint64_t xsq_q10_64 =
442
        ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
443
444
    const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
                        MAX_XSQ_Q10 : (int)xsq_q10_64;
445
446
447
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
    *rate = (n * r_q10 + 2) >> 2;
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
Deb Mukherjee's avatar
Deb Mukherjee committed
448
449
450
  }
}

451
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
452
453
454
455
456
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
457
458
459
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
460
  const int ref = xd->mi[0]->mbmi.ref_frame[0];
461
  unsigned int sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
462
463
464
465

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
466
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
467

Deb Mukherjee's avatar
Deb Mukherjee committed
468
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
469
                              pd->dst.buf, pd->dst.stride, &sse);
470

471
472
    if (i == 0)
      x->pred_sse[ref] = sse;
473
474

    // Fast approximate the modelling function.
475
    if (cpi->oxcf.speed > 4) {
476
      int64_t rate;
477
478
479
480
      int64_t dist;
      int64_t square_error = sse;
      int quantizer = (pd->dequant[1] >> 3);

481
482
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
483
484
485
486
487
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
488
489
490
    } else {
      int rate;
      int64_t dist;
491
492
      vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
                                   pd->dequant[1] >> 3, &rate, &dist);
493
      rate_sum += rate;
494
      dist_sum += dist;
495
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
496
497
  }

498
499
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
500
501
}

502
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
503
504
505
506
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
507
  int j, k;
508
  BLOCK_SIZE bs;
509
510
  const struct macroblock_plane *const p = &x->plane[0];
  const struct macroblockd_plane *const pd = &xd->plane[0];
511
512
  const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 * num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
513
514
  int rate_sum = 0;
  int64_t dist_sum = 0;
515
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
516
517
518
519
520
521
522
523
524
525
526
527

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
528

Deb Mukherjee's avatar
Deb Mukherjee committed
529
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
530
531
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
532
533
534
      int rate;
      int64_t dist;
      unsigned int sse;
535
536
537
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
538
      // sse works better than var, since there is no dc prediction used
539
540
      vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
                                   &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
541
542
543
544
545
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
546

Deb Mukherjee's avatar
Deb Mukherjee committed
547
  *out_rate_sum = rate_sum;
548
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
549
550
}

551
int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
552
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
553
  int i;
554
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
555

556
  for (i = 0; i < block_size; i++) {
557
558
559
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
560
  }
John Koleszar's avatar
John Koleszar committed
561

562
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
563
  return error;
John Koleszar's avatar
John Koleszar committed
564
565
}

566
567
568
569
570
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
571
static const int16_t band_counts[TX_SIZES][8] = {
572
573
574
575
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
576
};
577
static INLINE int cost_coeffs(MACROBLOCK *x,
578
                              int plane, int block,
579
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
580
                              TX_SIZE tx_size,
581
582
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
583
  MACROBLOCKD *const xd = &x->e_mbd;
584
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
585
586
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
587
  const PLANE_TYPE type = pd->plane_type;
588
  const int16_t *band_count = &band_counts[tx_size][1];
589
  const int eob = p->eobs[block];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
590
  const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
591
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
592
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
593
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
594
  int pt = combine_entropy_contexts(*A, *L);
595
  int c, cost;
596
  // Check for consistency of tx_size with mode info
597
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
Dmitry Kovalev's avatar
Dmitry Kovalev committed
598
                              : get_uv_tx_size(mbmi) == tx_size);
599

600
601
  if (eob == 0) {
    // single eob token
602
    cost = token_costs[0][0][pt][EOB_TOKEN];
603
    c = 0;
604
  } else {
605
    int band_left = *band_count++;
606
607

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
608
    int v = qcoeff[0];
609
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
610
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
611
    token_cache[0] = vp9_pt_energy_class[prev_t];
612
    ++token_costs;
613
614
615
616

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
617
      int t;
618

Dmitry Kovalev's avatar
Dmitry Kovalev committed
619
      v = qcoeff[rc];
620
      t = vp9_dct_value_tokens_ptr[v].token;
621
622
623
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
624
        pt = get_coef_context(nb, token_cache, c);
625
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
626
        token_cache[rc] = vp9_pt_energy_class[t];
627
      }
628
      prev_t = t;
629
      if (!--band_left) {
630
631
        band_left = *band_count++;
        ++token_costs;
632
      }
633
    }
634
635

    // eob token
636
    if (band_left) {
637
638
639
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
640
        pt = get_coef_context(nb, token_cache, c);
641
642
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
643
    }
644
645
  }

646
  // is eob first coefficient;
647
  *A = *L = (c > 0);
648

649
650
  return cost;
}
Alex Converse's avatar
Alex Converse committed
651
652
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
653
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
654
655
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
656
657
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
658
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
659
  int shift = tx_size == TX_32X32 ? 0 : 2;
660
661
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
662
663
664
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
665

666
  if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
667
668
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
669
                    (1 << ss_txfrm_size)) >> (shift + 2);
670
671
    args->dist += (p >> 4);
    args->sse  += p;
672
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
673
674
}

675
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
676
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
677
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
678
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
679

680
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
681
                           args->t_left + y_idx, tx_size,
682
683
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
684
685
}

686
687
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
688
689
690
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
691
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
692
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
693

694
695
696
  if (args->skip)
    return;

697
  if (!is_inter_block(mbmi))
698
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
699
  else
700
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
Deb Mukherjee's avatar
Deb Mukherjee committed
701

702
703
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
704
705
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
706
707

  // TODO(jingning): temporarily enabled only for luma component
708
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
709
  if (plane == 0)
710
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
711
                                    (rd1 > rd2 && !xd->lossless);
712

713
714
715
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
716
717
718
719
720
721
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
722
723
}

724
725
726
727
728
729
730
731
732
733
void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                              const struct macroblockd_plane *pd,
                              ENTROPY_CONTEXT t_above[16],
                              ENTROPY_CONTEXT t_left[16]) {
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
  const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
  const ENTROPY_CONTEXT *const above = pd->above_context;
  const ENTROPY_CONTEXT *const left = pd->left_context;

734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
James Zern's avatar
James Zern committed
759
      assert(0 && "Invalid transform size.");
760
761
762
  }
}

763
static void txfm_rd_in_plane(MACROBLOCK *x,
764
765
766
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
767
768
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
769
  MACROBLOCKD *const xd = &x->e_mbd;
770
  const struct macroblockd_plane *const pd = &xd->plane[plane];
771
772
  struct rdcost_block_args args;
  vp9_zero(args);
773
774
  args.x = x;
  args.best_rd = ref_best_rd;
775
  args.use_fast_coef_costing = use_fast_coef_casting;
776

777
  if (plane == 0)
778
    xd->mi[0]->mbmi.tx_size = tx_size;
779

780
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
781

782
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
783

784
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
785
786
                                         block_rd_txfm, &args);
  if (args.skip) {
787
788
789
790
791
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
792
793
794
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
795
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
796
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
797
798
799
800
801
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
802
                                     int64_t ref_best_rd,
803
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
804
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
805
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
806
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
807
  MACROBLOCKD *const xd = &x->e_mbd;
808
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
809
810
811

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
812
  txfm_rd_in_plane(x, rate, distortion, skip,
813
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
814
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
815
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
816
817
}

818
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
819
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
820
                                     int64_t *d, int64_t *distortion,
821
                                     int *s, int *skip,
822
                                     int64_t tx_cache[TX_MODES],
823
                                     BLOCK_SIZE bs) {
824
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
825
826
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
827
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
828
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
829
830
831
832
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
Yaowu Xu's avatar
Yaowu Xu committed
833
  TX_SIZE n, m;
834
  int s0, s1;
835
836
837
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
838

839
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
840
841
842
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
843

844
  for (n = TX_4X4; n <= max_tx_size; n++) {
845
846
847
848
849
850
851
852
853
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
854
855
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
856
    } else if (s[n]) {
857
858
859
860
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
861
    }
862
863
864
865
866

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
867
  }
868
869
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
870
871


872
873
874
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
875

876
877
878
879
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
880

881
882
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
883
    cpi->tx_stepdown_count[0]++;
884
885
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
886
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
887
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
888
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
889
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
890
  } else {
891
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
892
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
893
  }
Deb Mukherjee's avatar