vp9_rdopt.c 166 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17
18
19
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
20
21
22
23
24
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
25
26
27
28
29
30
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
31
#include "vpx_mem/vpx_mem.h"
32
33
34
35
36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
37
#include "./vp9_rtcd.h"
38
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
39
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define INVALID_MV 0x80008000

43
44
45
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

46
47
48
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
49

Paul Wilkins's avatar
Paul Wilkins committed
50
51
#define MIN_EARLY_TERM_INDEX    3

52
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},

  {DC_PRED,   INTRA_FRAME,  NONE},

  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},

  {NEARMV,    LAST_FRAME,   NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},

  {TM_PRED,   INTRA_FRAME,  NONE},

  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},

  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},

  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D207_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
};

const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
  {LAST_FRAME,   NONE},
  {GOLDEN_FRAME, NONE},
  {ALTREF_FRAME, NONE},
  {LAST_FRAME,   ALTREF_FRAME},
  {GOLDEN_FRAME, ALTREF_FRAME},
  {INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
99
100
};

101
102
103
104
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
105
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
106
107
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

Deb Mukherjee's avatar
Deb Mukherjee committed
108
109
110
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
#define RD_THRESH_POW      1.25
111
#define RD_MULT_EPB_RATIO  64
Deb Mukherjee's avatar
Deb Mukherjee committed
112

113
114
#define MV_COST_WEIGHT      108
#define MV_COST_WEIGHT_SUB  120
115

116
117
118
119
120
121
122
123
124
125
126
127
128
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
  const int stride = 4 << b_width_log2(plane_bsize);
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
static void fill_mode_costs(VP9_COMP *c) {
  VP9_COMMON *const cm = &c->common;
  int i, j;

  for (i = 0; i < INTRA_MODES; i++)
    for (j = 0; j < INTRA_MODES; j++)
      vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
                      vp9_intra_mode_tree);

  // TODO(rbultje) separate tables for superblock costing?
  vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1],
                  vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
                  cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
  vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
                  vp9_kf_uv_mode_prob[INTRA_MODES - 1],
                  vp9_intra_mode_tree);

  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
    vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
                    cm->fc.switchable_interp_prob[i],
                    vp9_switchable_interp_tree);
}

153
static void fill_token_costs(vp9_coeff_cost *c,
154
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
155
  int i, j, k, l;
156
157
158
159
160
161
162
163
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
164
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
165
                            vp9_coef_tree);
166
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
167
                                 vp9_coef_tree);
168
169
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
                   c[t][i][j][k][1][l][EOB_TOKEN]);
170
          }
171
172
}

173
174
175
176
177
178
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
179

180
// 3* dc_qlookup[Q]*dc_qlookup[Q];
181

182
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
183
184
185
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

186
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
187
188
189
190
191
192
193
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
194
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
195
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
196
  }
Paul Wilkins's avatar
Paul Wilkins committed
197
}
John Koleszar's avatar
John Koleszar committed
198

199
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
200
  const int q = vp9_dc_quant(qindex, 0);
201
  // TODO(debargha): Adjust the function below
202
203
204
205
206
207
208
209
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
210
211
212
213
214
215
216
217
218
}

static int compute_rd_thresh_factor(int qindex) {
  int q;
  // TODO(debargha): Adjust the function below
  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  if (q < 8)
    q = 8;
  return q;
219
220
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
221
222
223
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
224
225
}

226
227
228
229
230
231
232
233
234
static void set_block_thresholds(VP9_COMP *cpi) {
  int i, bsize, segment_id;
  VP9_COMMON *cm = &cpi->common;

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    int q;
    int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
    segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
    q = compute_rd_thresh_factor(segment_qindex);
235

236
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
237
238
239
      // Threshold here seem unecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[]
      int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
John Koleszar's avatar
John Koleszar committed
240

241
242
243
244
245
246
247
248
      for (i = 0; i < MAX_MODES; ++i) {
        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[segment_id][bsize][i] =
              cpi->sf.thresh_mult[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
        }
John Koleszar's avatar
John Koleszar committed
249
      }
250

251
252
253
254
255
256
257
258
      for (i = 0; i < MAX_REFS; ++i) {
        if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
              cpi->sf.thresh_mult_sub8x8[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
        }
259
260
      }
    }
John Koleszar's avatar
John Koleszar committed
261
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
262
263
}

264
265
266
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  int qindex, i;
Deb Mukherjee's avatar
Deb Mukherjee committed
267
268
269
270
271
272
273

  vp9_clear_system_state();  // __asm emms;

  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
274
  qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
Deb Mukherjee's avatar
Deb Mukherjee committed
275

276
  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
277
278
  cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);

279
  cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
Deb Mukherjee's avatar
Deb Mukherjee committed
280
281
282
283
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);

  vp9_set_speed_features(cpi);

284
285
  cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                              cm->frame_type != KEY_FRAME) ?
286
                              0 : 1;
287

288
  set_block_thresholds(cpi);
John Koleszar's avatar
John Koleszar committed
289

290
  fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
291

292
  for (i = 0; i < PARTITION_CONTEXTS; i++)
293
    vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
294
295
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
296
  /*rough estimate for costing*/
297
  fill_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
298

299
  if (!frame_is_intra_only(cm)) {
300
    vp9_build_nmv_cost_table(
301
        cpi->mb.nmvjointcost,
302
303
304
        cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cm->fc.nmvc,
        cm->allow_high_precision_mv, 1, 1);
305

306
307
308
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
      vp9_cost_tokens((int *)cpi->mb.inter_mode_cost[i],
                      cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
309
  }
John Koleszar's avatar
John Koleszar committed
310
311
}

Yaowu Xu's avatar
Yaowu Xu committed
312
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
313
314
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
315
316
317
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
318
319
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
320
321
  } else {
    double a = y - d;
322
323
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
324
325
326
  }
}

327
328
329
330
331
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
332
  // Normalized rate
333
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
357
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
380
381
382
383
384
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
385
  assert(x >= 0.0);
386
387
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
406
    model_rd_norm(x, &R, &D);
Yaowu Xu's avatar
Yaowu Xu committed
407
408
    *rate = (int)((n << 8) * R + 0.5);
    *dist = (int)(var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
409
410
411
412
  }
  vp9_clear_system_state();
}

413
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
414
415
416
417
418
419
420
421
422
423
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
424
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
425
426
427
428
429
430
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
Dmitry Kovalev's avatar
Dmitry Kovalev committed
431
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
432
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
433
434

    rate_sum += rate;
Yaowu Xu's avatar
Yaowu Xu committed
435
    dist_sum += (int)dist;
Deb Mukherjee's avatar
Deb Mukherjee committed
436
437
438
439
440
441
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

442
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
443
444
445
446
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
447
  int j, k;
448
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
449
450
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
451
452
  const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 << num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
453
454
  int rate_sum = 0;
  int64_t dist_sum = 0;
455
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
456
457
458
459
460
461
462
463
464
465
466
467

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
468

Deb Mukherjee's avatar
Deb Mukherjee committed
469
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
470
471
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
472
473
474
      int rate;
      int64_t dist;
      unsigned int sse;
475
476
477
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
478
      // sse works better than var, since there is no dc prediction used
479
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
480
481
482
483
484
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
485

Deb Mukherjee's avatar
Deb Mukherjee committed
486
  *out_rate_sum = rate_sum;
487
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
488
489
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
490
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
491
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
492
  int i;
493
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
494

495
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
496
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
497
    error += (unsigned)this_diff * this_diff;
498
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
499
  }
John Koleszar's avatar
John Koleszar committed
500

501
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
502
  return error;
John Koleszar's avatar
John Koleszar committed
503
504
}

505
506
507
508
509
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
510
static const int16_t band_counts[TX_SIZES][8] = {
511
512
513
514
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
515
516
};

517
static INLINE int cost_coeffs(MACROBLOCK *x,
518
                              int plane, int block,
519
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
520
                              TX_SIZE tx_size,
521
                              const int16_t *scan, const int16_t *nb) {
522
  MACROBLOCKD *const xd = &x->e_mbd;
523
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
524
  struct macroblock_plane *p = &x->plane[plane];
525
526
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
527
  const int16_t *band_count = &band_counts[tx_size][1];
528
  const int eob = p->eobs[block];
529
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
530
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
531
  unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][ENTROPY_TOKENS] =
532
                   x->token_costs[tx_size][type][ref];
533
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
534
  uint8_t *p_tok = x->token_cache;
535
536
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
537
538

  // Check for consistency of tx_size with mode info
539
  assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
540
                                      : get_uv_tx_size(mbmi) == tx_size);
541

542
543
  if (eob == 0) {
    // single eob token
544
    cost = token_costs[0][0][pt][EOB_TOKEN];
545
    c = 0;
546
  } else {
547
    int band_left = *band_count++;
548
549

    // dc token
550
551
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
552
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
553
    p_tok[0] = vp9_pt_energy_class[prev_t];
554
    ++token_costs;
555
556
557
558

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
559
      int t;
560
561
562

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
563
      pt = get_coef_context(nb, p_tok, c);
564
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
565
      p_tok[rc] = vp9_pt_energy_class[t];
566
      prev_t = t;
567
      if (!--band_left) {
568
569
        band_left = *band_count++;
        ++token_costs;
570
      }
571
    }
572
573

    // eob token
574
    if (band_left) {
575
      pt = get_coef_context(nb, p_tok, c);
576
      cost += (*token_costs)[0][pt][EOB_TOKEN];
577
    }
578
579
  }

580
  // is eob first coefficient;
581
  *A = *L = (c > 0);
582

583
584
585
  return cost;
}

586
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
587
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
588
589
590
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
591
592
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
593
594
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
595
596
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
597
598
599
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
600

601
  if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
602
603
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
604
                    (1 << ss_txfrm_size)) >> (shift + 2);
605
606
    args->dist += (p >> 4);
    args->sse  += p;
607
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
608
609
}

610
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
611
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
612
  struct rdcost_block_args* args = arg;
613

Deb Mukherjee's avatar
Deb Mukherjee committed
614
  int x_idx, y_idx;
615
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
616

617
618
619
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                           args->t_left + y_idx, args->tx_size,
                           args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
620
621
}

622
623
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
624
625
626
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
627
  struct encode_b_args encode_args = {x, NULL};
628
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
629

630
631
632
  if (args->skip)
    return;

633
  if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
634
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
635
  else
636
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
637

638
639
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
640
641
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
642
643

  // TODO(jingning): temporarily enabled only for luma component
644
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
645
  if (plane == 0)
646
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
647
                                    (rd1 > rd2 && !xd->lossless);
648

649
650
651
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
652
653
654
655
656
657
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
658
659
}

660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
      assert(!"Invalid transform size.");
  }
}

693
694
695
696
697
698
699
700
701
702
703
704
static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
                              const int num_4x4_w, const int num_4x4_h,
                              const int64_t ref_rdcost,
                              struct rdcost_block_args *arg) {
  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
  arg->x = x;
  arg->tx_size = tx_size;
  arg->bw = num_4x4_w;
  arg->bh = num_4x4_h;
  arg->best_rd = ref_rdcost;
}

705
static void txfm_rd_in_plane(MACROBLOCK *x,
706
                             struct rdcost_block_args *rd_stack,
707
708
709
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
710
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
711
  MACROBLOCKD *const xd = &x->e_mbd;
712
  struct macroblockd_plane *const pd = &xd->plane[plane];
713
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
714
715
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
716
  const scan_order *so;
717

718
719
  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
                    ref_best_rd, rd_stack);
720
  if (plane == 0)
721
    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
722

723
  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
724
725
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
726

727
728
729
  so = get_scan(xd, tx_size, pd->plane_type, 0);
  rd_stack->scan = so->scan;
  rd_stack->nb = so->neighbors;
Deb Mukherjee's avatar
Deb Mukherjee committed
730

731
  foreach_transformed_block_in_plane(xd, bsize, plane,
732
                                     block_rd_txfm, rd_stack);
733
  if (rd_stack->skip) {
734
735
736
737
738
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
739
740
741
    *distortion = rd_stack->this_dist;
    *rate       = rd_stack->this_rate;
    *sse        = rd_stack->this_sse;
742
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
743
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
744
745
746
747
748
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
749
                                     int64_t ref_best_rd,
750
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
751
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
752
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
753
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
754
  MACROBLOCKD *const xd = &x->e_mbd;
755
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
756
757
758

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

759
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
760
761
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
762
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
763
764
}

765
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
766
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
767
                                     int64_t *d, int64_t *distortion,
768
                                     int *s, int *skip,
769
                                     int64_t tx_cache[TX_MODES],
770
                                     BLOCK_SIZE bs) {
771
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
772
773
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
774
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
775
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
776
  int64_t rd[TX_SIZES][2];
777
  int n, m;
778
  int s0, s1;
779
780
781
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
782

783
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
784
785
786
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
787

788
  for (n = TX_4X4; n <= max_tx_size; n++) {
789
790
791
792
793
794
795
796
797
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
798
799
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
800
    } else if (s[n]) {
801
802
803
804
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
805
    }
806
807
808
809
810

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
811
  }
812
813
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
814
815


816
817
818
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
819

820
821
822
823
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
824

825
826
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
827
    cpi->tx_stepdown_count[0]++;
828
829
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
830
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
831
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
832
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
833
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
834
  } else {
835
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
836
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
837
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
838
}
839

Deb Mukherjee's avatar
Deb Mukherjee committed
840
841
842
843
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
844
                                          int64_t ref_best_rd,
845
                                          BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
846
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
847
848
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
849
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
850
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
851
  int64_t rd[TX_SIZES][2];
Deb Mukherjee's avatar
Deb Mukherjee committed
852
853
  int n, m;
  int s0, s1;
854
  double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
855
856
857
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
858

859
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
860
861
862
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
863

Dmitry Kovalev's avatar
Dmitry Kovalev committed
864
  for (n = TX_4X4; n <= max_tx_size; n++) {
865
    double scale = scale_rd[n];
Deb Mukherjee's avatar
Deb Mukherjee committed
866
    r[n][1] = r[n][0];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
867
    for (m = 0; m <= n - (n == max_tx_size); m++) {
Deb Mukherjee's avatar
Deb Mukherjee committed
868
869
870
871
872
873
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
    if (s[n]) {
874
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
Deb Mukherjee's avatar
Deb Mukherjee committed
875
    } else {
876
877
878
879
880
881
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
    }
    if (rd[n][1] < best_rd) {
      best_rd = rd[n][1];
      best_tx = n;
Deb Mukherjee's avatar
Deb Mukherjee committed
882
883
    }
  }
884

885
886
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
887

888
889
  // Actually encode using the chosen mode if a model was used, but do not
  // update the r, d costs
890
891
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
892

893
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
894
    cpi->tx_stepdown_count[0]++;
895
  } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
896
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
897
  } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
898
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
899
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
900
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
901
  }
902
903
}

904
static void super_block_yrd(VP9_COMP *cpi,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
905
                            MACROBLOCK *x, int *rate, int64_t *distortion,
906
                            int *skip, int64_t *psse, BLOCK_SIZE bs,
907
                            int64_t txfm_cache[TX_MODES],
908
                            int64_t ref_best_rd) {
909
910
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
Jim Bankoski's avatar
Jim Bankoski committed
911
  MACROBLOCKD *xd = &x->e_mbd;
912
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
913
  struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
914
  const int b_inter_mode = is_inter_block(mbmi);
Yaowu Xu's avatar
Yaowu Xu committed
915
916
917
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
  TX_SIZE tx_size;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
918

919
  assert(bs == mbmi->sb_type);
920
  if (b_inter_mode)
921
    vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
922

Deb Mukherjee's avatar
Deb Mukherjee committed
923
924
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
      (cpi->sf.tx_size_search_method != USE_FULL_RD &&
925
       !b_inter_mode)) {
926
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
927
928
    choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
                             ref_best_rd, bs);
929