vp9_rdopt.c 164 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17
18
19
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
20
21
22
23
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_quant_common.h"
24
25
26
27
28
29
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
30
#include "vpx_mem/vpx_mem.h"
31
32
33
34
35
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
36
#include "./vp9_rtcd.h"
37
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
38
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
39

40
41
42
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

43
44
45
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
46

Paul Wilkins's avatar
Paul Wilkins committed
47
48
#define MIN_EARLY_TERM_INDEX    3

49
50
51
52
53
54
55
56
57
typedef struct {
  MB_PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

58
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, NONE}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},
  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
96
97
98
};

const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
99
100
101
102
103
104
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
105
106
};

107
108
109
110
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
111
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
112
113
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

114
115
116
117
118
119
120
121
122
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
123
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
124
125
126
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
127
128
129
130
static void fill_mode_costs(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCK *const x = &cpi->mb;
  FRAME_CONTEXT *const fc = &cm->fc;
131
132
133
134
  int i, j;

  for (i = 0; i < INTRA_MODES; i++)
    for (j = 0; j < INTRA_MODES; j++)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
135
      vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
136
137
138
                      vp9_intra_mode_tree);

  // TODO(rbultje) separate tables for superblock costing?
Dmitry Kovalev's avatar
Dmitry Kovalev committed
139
140
141
142
143
  vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
  vp9_cost_tokens(x->intra_uv_mode_cost[1],
                  fc->uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
  vp9_cost_tokens(x->intra_uv_mode_cost[0],
                  vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
144
145

  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
146
147
    vp9_cost_tokens((int *)x->switchable_interp_costs[i],
                    fc->switchable_interp_prob[i],
148
149
150
                    vp9_switchable_interp_tree);
}

151
static void fill_token_costs(vp9_coeff_cost *c,
152
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
153
  int i, j, k, l;
154
  TX_SIZE t;
155
  for (t = TX_4X4; t <= TX_32X32; ++t)
156
    for (i = 0; i < PLANE_TYPES; ++i)
157
158
159
      for (j = 0; j < REF_TYPES; ++j)
        for (k = 0; k < COEF_BANDS; ++k)
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
160
161
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
162
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
163
                            vp9_coef_tree);
164
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
165
                                 vp9_coef_tree);
166
167
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
                   c[t][i][j][k][1][l][EOB_TOKEN]);
168
          }
169
170
}

171
172
173
174
175
176
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
177

178
// 3* dc_qlookup[Q]*dc_qlookup[Q];
179

180
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
181
182
183
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

184
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
185
186
187
188
189
190
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
191
192
193
    const double q = vp9_convert_qindex_to_q(i);
    sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
    sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
John Koleszar's avatar
John Koleszar committed
194
  }
Paul Wilkins's avatar
Paul Wilkins committed
195
}
John Koleszar's avatar
John Koleszar committed
196

197
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
198
  const int q = vp9_dc_quant(qindex, 0);
199
  // TODO(debargha): Adjust the function below
200
201
202
203
204
205
206
207
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
208
209
210
211
212
213
214
215
216
}

static int compute_rd_thresh_factor(int qindex) {
  int q;
  // TODO(debargha): Adjust the function below
  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  if (q < 8)
    q = 8;
  return q;
217
218
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
219
220
221
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
222
223
}

224
225
226
static void set_block_thresholds(VP9_COMP *cpi) {
  int i, bsize, segment_id;
  VP9_COMMON *cm = &cpi->common;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
227
  SPEED_FEATURES *sf = &cpi->sf;
228
229

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
230
231
232
233
    const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
                                            cm->base_qindex) + cm->y_dc_delta_q,
                             0, MAXQ);
    const int q = compute_rd_thresh_factor(qindex);
234

235
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
236
237
      // Threshold here seems unnecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[].
Dmitry Kovalev's avatar
Dmitry Kovalev committed
238
239
240
241
242
243
244
      const int t = q * rd_thresh_block_size_factor[bsize];
      const int thresh_max = INT_MAX / t;

      for (i = 0; i < MAX_MODES; ++i)
        cpi->rd_threshes[segment_id][bsize][i] =
            sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4
                                            : INT_MAX;
245

246
      for (i = 0; i < MAX_REFS; ++i) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
247
248
249
250
        cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
            sf->thresh_mult_sub8x8[i] < thresh_max
                ? sf->thresh_mult_sub8x8[i] * t / 4
                : INT_MAX;
251
252
      }
    }
John Koleszar's avatar
John Koleszar committed
253
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
254
255
}

256
257
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
258
  MACROBLOCK *x = &cpi->mb;
259
  int qindex, i;
Deb Mukherjee's avatar
Deb Mukherjee committed
260
261
262
263
264
265
266

  vp9_clear_system_state();  // __asm emms;

  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
267
  qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
Deb Mukherjee's avatar
Deb Mukherjee committed
268

269
  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
270
271
  cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);

Dmitry Kovalev's avatar
Dmitry Kovalev committed
272
  x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO + (x->errorperbit == 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
273
274
275

  vp9_set_speed_features(cpi);

Dmitry Kovalev's avatar
Dmitry Kovalev committed
276
277
  x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                         cm->frame_type != KEY_FRAME) ? 0 : 1;
278

279
  set_block_thresholds(cpi);
John Koleszar's avatar
John Koleszar committed
280

Dmitry Kovalev's avatar
Dmitry Kovalev committed
281
  fill_token_costs(x->token_costs, cm->fc.coef_probs);
282

283
  for (i = 0; i < PARTITION_CONTEXTS; i++)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
284
    vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
285
286
                    vp9_partition_tree);

287
  fill_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
288

289
  if (!frame_is_intra_only(cm)) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
290
291
292
293
294
    vp9_build_nmv_cost_table(x->nmvjointcost,
                             cm->allow_high_precision_mv ? x->nmvcost_hp
                                                         : x->nmvcost,
                             &cm->fc.nmvc,
                             cm->allow_high_precision_mv, 1, 1);
295

296
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
297
      vp9_cost_tokens((int *)x->inter_mode_cost[i],
298
                      cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
299
  }
John Koleszar's avatar
John Koleszar committed
300
301
}

302
static const int MAX_XSQ_Q10 = 245727;
Deb Mukherjee's avatar
Deb Mukherjee committed
303

304
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
305
  // NOTE: The tables below must be of the same size
306
307
308
309

  // The functions described below are sampled at the four most significant
  // bits of x^2 + 8 / 256

Deb Mukherjee's avatar
Deb Mukherjee committed
310
  // Normalized rate
311
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
312
313
314
315
316
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
317
318
319
320
321
322
323
324
325
326
327
328
329
330
  static const int rate_tab_q10[] = {
    65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
     4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
     3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
     3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
     2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
     2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
     1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
     1159,  1086,  1021,   963,   911,   864,   821,   781,
      745,   680,   623,   574,   530,   490,   455,   424,
      395,   345,   304,   269,   239,   213,   190,   171,
      154,   126,   104,    87,    73,    61,    52,    44,
       38,    28,    21,    16,    12,    10,     8,     6,
        5,     3,     2,     1,     1,     1,     0,     0,
Deb Mukherjee's avatar
Deb Mukherjee committed
331
332
  };
  // Normalized distortion
333
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
334
335
336
337
338
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  static const int dist_tab_q10[] = {
       0,     0,     1,     1,     1,     2,     2,     2,
       3,     3,     4,     5,     5,     6,     7,     7,
       8,     9,    11,    12,    13,    15,    16,    17,
      18,    21,    24,    26,    29,    31,    34,    36,
      39,    44,    49,    54,    59,    64,    69,    73,
      78,    88,    97,   106,   115,   124,   133,   142,
     151,   167,   184,   200,   215,   231,   245,   260,
     274,   301,   327,   351,   375,   397,   418,   439,
     458,   495,   528,   559,   587,   613,   637,   659,
     680,   717,   749,   777,   801,   823,   842,   859,
     874,   899,   919,   936,   949,   960,   969,   977,
     983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
    1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
  };
  static const int xsq_iq_q10[] = {
         0,      4,      8,     12,     16,     20,     24,     28,
        32,     40,     48,     56,     64,     72,     80,     88,
        96,    112,    128,    144,    160,    176,    192,    208,
       224,    256,    288,    320,    352,    384,    416,    448,
       480,    544,    608,    672,    736,    800,    864,    928,
       992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
      2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
      4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
      8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
     32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
     65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
    131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
Deb Mukherjee's avatar
Deb Mukherjee committed
368
  };
369
  /*
370
371
372
373
  static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
  assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
  assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
  assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
374
  */
375
376
377
378
379
380
381
382
  int tmp = (xsq_q10 >> 2) + 8;
  int k = get_msb(tmp) - 3;
  int xq = (k << 3) + ((tmp >> k) & 0x7);
  const int one_q10 = 1 << 10;
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
  const int b_q10 = one_q10 - a_q10;
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
Deb Mukherjee's avatar
Deb Mukherjee committed
383
384
}

385
386
387
static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
                                     unsigned int qstep, int *rate,
                                     int64_t *dist) {
Deb Mukherjee's avatar
Deb Mukherjee committed
388
389
390
391
392
393
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
394
  if (var == 0) {
Deb Mukherjee's avatar
Deb Mukherjee committed
395
396
397
    *rate = 0;
    *dist = 0;
  } else {
398
399
400
401
402
403
404
    int d_q10, r_q10;
    uint64_t xsq_q10_64 =
        ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
    int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ? MAX_XSQ_Q10 : xsq_q10_64;
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
    *rate = (n * r_q10 + 2) >> 2;
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
Deb Mukherjee's avatar
Deb Mukherjee committed
405
406
407
  }
}

408
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
409
410
411
412
413
414
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;
415
416
  int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
  unsigned int sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
417
418
419
420

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
421
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
422
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
423
424
425
                              pd->dst.buf, pd->dst.stride, &sse);
    if (i == 0)
      x->pred_sse[ref] = sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
426

Jim Bankoski's avatar
Jim Bankoski committed
427
    dist_sum += (int)sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
428
429
430
431
432
433
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

434
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
435
436
437
438
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
439
  int j, k;
440
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
441
442
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
443
444
  const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 * num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
445
446
  int rate_sum = 0;
  int64_t dist_sum = 0;
447
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
448
449
450
451
452
453
454
455
456
457
458
459

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
460

Deb Mukherjee's avatar
Deb Mukherjee committed
461
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
462
463
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
464
465
466
      int rate;
      int64_t dist;
      unsigned int sse;
467
468
469
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
470
      // sse works better than var, since there is no dc prediction used
471
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
472
473
474
475
476
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
477

Deb Mukherjee's avatar
Deb Mukherjee committed
478
  *out_rate_sum = rate_sum;
479
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
480
481
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
482
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
483
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
484
  int i;
485
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
486

487
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
488
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
489
    error += (unsigned)this_diff * this_diff;
490
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
491
  }
John Koleszar's avatar
John Koleszar committed
492

493
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
494
  return error;
John Koleszar's avatar
John Koleszar committed
495
496
}

497
498
499
500
501
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
502
static const int16_t band_counts[TX_SIZES][8] = {
503
504
505
506
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
507
508
};

509
static INLINE int cost_coeffs(MACROBLOCK *x,
510
                              int plane, int block,
511
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
512
                              TX_SIZE tx_size,
513
                              const int16_t *scan, const int16_t *nb) {
514
  MACROBLOCKD *const xd = &x->e_mbd;
515
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
516
  struct macroblock_plane *p = &x->plane[plane];
517
518
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
519
  const int16_t *band_count = &band_counts[tx_size][1];
520
  const int eob = p->eobs[block];
521
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
522
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
523
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
524
                   x->token_costs[tx_size][type][ref];
525
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
526
  uint8_t *p_tok = x->token_cache;
527
528
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
529
530

  // Check for consistency of tx_size with mode info
531
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
532
                                      : get_uv_tx_size(mbmi) == tx_size);
533

534
535
  if (eob == 0) {
    // single eob token
536
    cost = token_costs[0][0][pt][EOB_TOKEN];
537
    c = 0;
538
  } else {
539
    int band_left = *band_count++;
540
541

    // dc token
542
543
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
544
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
545
    p_tok[0] = vp9_pt_energy_class[prev_t];
546
    ++token_costs;
547
548
549
550

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
551
      int t;
552
553
554

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
555
      pt = get_coef_context(nb, p_tok, c);
556
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
557
      p_tok[rc] = vp9_pt_energy_class[t];
558
      prev_t = t;
559
      if (!--band_left) {
560
561
        band_left = *band_count++;
        ++token_costs;
562
      }
563
    }
564
565

    // eob token
566
    if (band_left) {
567
      pt = get_coef_context(nb, p_tok, c);
568
      cost += (*token_costs)[0][pt][EOB_TOKEN];
569
    }
570
571
  }

572
  // is eob first coefficient;
573
  *A = *L = (c > 0);
574

575
576
577
  return cost;
}

578
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
579
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
580
581
582
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
583
584
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
585
586
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
587
588
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
589
590
591
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
592

593
  if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
594
595
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
596
                    (1 << ss_txfrm_size)) >> (shift + 2);
597
598
    args->dist += (p >> 4);
    args->sse  += p;
599
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
600
601
}

602
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
603
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
604
  struct rdcost_block_args* args = arg;
605

Deb Mukherjee's avatar
Deb Mukherjee committed
606
  int x_idx, y_idx;
607
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
608

609
610
611
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                           args->t_left + y_idx, args->tx_size,
                           args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
612
613
}

614
615
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
616
617
618
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
Jim Bankoski's avatar
Jim Bankoski committed
619
620
621
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
  struct encode_b_args encode_args = {x, NULL, &mbmi->skip_coeff};

622
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
623

624
625
626
  if (args->skip)
    return;

627
  if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
628
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
629
  else
630
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
631

632
633
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
634
635
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
636
637

  // TODO(jingning): temporarily enabled only for luma component
638
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
639
  if (plane == 0)
640
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
641
                                    (rd1 > rd2 && !xd->lossless);
642

643
644
645
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
646
647
648
649
650
651
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
652
653
}

654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
James Zern's avatar
James Zern committed
683
      assert(0 && "Invalid transform size.");
684
685
686
  }
}

687
688
689
690
691
692
693
694
695
696
697
698
static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
                              const int num_4x4_w, const int num_4x4_h,
                              const int64_t ref_rdcost,
                              struct rdcost_block_args *arg) {
  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
  arg->x = x;
  arg->tx_size = tx_size;
  arg->bw = num_4x4_w;
  arg->bh = num_4x4_h;
  arg->best_rd = ref_rdcost;
}

699
static void txfm_rd_in_plane(MACROBLOCK *x,
700
                             struct rdcost_block_args *rd_stack,
701
702
703
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
704
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
705
  MACROBLOCKD *const xd = &x->e_mbd;
706
  struct macroblockd_plane *const pd = &xd->plane[plane];
707
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
708
709
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
710
  const scan_order *so;
711

712
713
  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
                    ref_best_rd, rd_stack);
714
  if (plane == 0)
715
    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
716

717
  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
718
719
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
720

721
722
723
  so = get_scan(xd, tx_size, pd->plane_type, 0);
  rd_stack->scan = so->scan;
  rd_stack->nb = so->neighbors;
Deb Mukherjee's avatar
Deb Mukherjee committed
724

725
  foreach_transformed_block_in_plane(xd, bsize, plane,
726
                                     block_rd_txfm, rd_stack);
727
  if (rd_stack->skip) {
728
729
730
731
732
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
733
734
735
    *distortion = rd_stack->this_dist;
    *rate       = rd_stack->this_rate;
    *sse        = rd_stack->this_sse;
736
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
737
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
738
739
740
741
742
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
743
                                     int64_t ref_best_rd,
744
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
745
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
746
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
747
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
748
  MACROBLOCKD *const xd = &x->e_mbd;
749
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
750
751
752

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

753
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
754
755
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
756
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
757
758
}

759
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
760
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
761
                                     int64_t *d, int64_t *distortion,
762
                                     int *s, int *skip,
763
                                     int64_t tx_cache[TX_MODES],
764
                                     BLOCK_SIZE bs) {
765
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
766
767
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
768
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
769
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
770
  int64_t rd[TX_SIZES][2];
771
  int n, m;
772
  int s0, s1;
773
774
775
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
776

777
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
778
779
780
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
781

782
  for (n = TX_4X4; n <= max_tx_size; n++) {
783
784
785
786
787
788
789
790
791
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
792
793
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
794
    } else if (s[n]) {
795
796
797
798
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
799
    }
800
801
802
803
804

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
805
  }
806
807
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
808
809


810
811
812
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
813

814
815
816
817
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
818

819
820
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
821
    cpi->tx_stepdown_count[0]++;
822
823
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
824
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
825
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
826
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
827
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
828
  } else {
829
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
830
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
831
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
832
}
833

Deb Mukherjee's avatar
Deb Mukherjee committed
834
835
836
837
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
838
                                          int64_t ref_best_rd,
839
                                          BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
840
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
841
842
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
843
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
844
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
845
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
846
847
  int n, m;
  int s0, s1;
848
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
849
850
851
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
852

853
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
854
855
856
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
857

Dmitry Kovalev's avatar
Dmitry Kovalev committed
858
  for (n = TX_4X4; n <= max_tx_size; n++) {
859
    double scale = scale_rd[n];
Deb Mukherjee's avatar
Deb Mukherjee committed
860
    r[n][1] = r[n][0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
861
    for (m = 0; m <= n - (n == max_tx_size); m++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
862
863
864
865
866
867
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
    if (s[n]) {
868
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
Deb Mukherjee's avatar
Deb Mukherjee committed
869
    } else {
870
871
872
873
874
875
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
    }
    if (rd[n][1] < best_rd) {
      best_rd = rd[n][1];
      best_tx = n;
Deb Mukherjee's avatar
Deb Mukherjee committed
876
877
    }
  }
878

879
880
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
881

882
883
  // Actually encode using the chosen mode if a model was used, but do not
  // update the r, d costs
884
885
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
886

887
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
888
    cpi->tx_stepdown_count[0]++;
889
  } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
890
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
891
  } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
892
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
893
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
894
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
895
  }
896
897
}

898
static void super_block_yrd(VP9_COMP *cpi,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
899
                            MACROBLOCK *x, int *rate, int64_t *distortion,
900
                            int *skip, int64_t *psse, BLOCK_SIZE bs,
901
                            int64_t txfm_cache[TX_MODES],
902
                            int64_t ref_best_rd) {
903
904
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
Jim Bankoski's avatar
Jim Bankoski committed
905
  MACROBLOCKD *xd = &x->e_mbd;
906
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
907
  struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
908
  const int b_inter_mode = is_inter_block(mbmi);
Yaowu Xu's avatar
Yaowu Xu committed
909
910
911
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
  TX_SIZE tx_size