vp9_rdopt.c 155 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
  {NEARESTMV, LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
57
58
59
60
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
  {NEWMV,     LAST_FRAME,   NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
61
  {NEARMV,    LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
62
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
63

Yaowu Xu's avatar
Yaowu Xu committed
64
65
  {DC_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
66
67
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
68
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
69

70
71
  {TM_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
72
73
74
75
76
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
77

78
79
80
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
Paul Wilkins's avatar
Paul Wilkins committed
81
82
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
83

Paul Wilkins's avatar
Paul Wilkins committed
84
85
86
  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
Ronald S. Bultje's avatar
Ronald S. Bultje committed
87
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
88
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
89

Paul Wilkins's avatar
Paul Wilkins committed
90
91
92
93
94
95
96
97
98
  {I4X4_PRED, INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
99
100
};

101
102
103
104
105
106
107
108
109
110
111
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

112
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
113
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
114
  int i, j, k, l;
115
116
117
118
119
120
121
122
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
123
            vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
124
                            vp9_coef_tree);
125
#if CONFIG_BALANCED_COEFTREE
126
127
128
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
129
            vp9_cost_tokens((int *)c[t][i][j][1][k][l], probs, vp9_coef_tree);
130
#else
131
            vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
132
                                 vp9_coef_tree);
133
134
            assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
135
#endif
136
          }
137
138
}

139
140
141
142
143
144
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
145

146
// 3* dc_qlookup[Q]*dc_qlookup[Q];
147

148
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
149
150
151
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

152
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
153
154
155
156
157
158
159
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
160
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
161
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
162
  }
Paul Wilkins's avatar
Paul Wilkins committed
163
}
John Koleszar's avatar
John Koleszar committed
164

165
static int compute_rd_mult(int qindex) {
166
  const int q = vp9_dc_quant(qindex, 0);
167
  return (11 * q * q) >> 2;
168
169
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
170
171
172
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
173
174
}

175

Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
177
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
178

179
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
180

John Koleszar's avatar
John Koleszar committed
181
182
183
184
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
185
  qindex = clamp(qindex, 0, MAXQ);
186

Dmitry Kovalev's avatar
Dmitry Kovalev committed
187
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
188
189
190
191
192
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
193
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
194
  }
195
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
196
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
197

198
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
199

Dmitry Kovalev's avatar
Dmitry Kovalev committed
200
201
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
202
203
  if (q < 8)
    q = 8;
204

John Koleszar's avatar
John Koleszar committed
205
206
207
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
208

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
224

225
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
226
227
228
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
229
      }
John Koleszar's avatar
John Koleszar committed
230
    }
John Koleszar's avatar
John Koleszar committed
231
232
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
233

234
235
236
237
238
239
240
241
242
243
244
245
246
247
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
Paul Wilkins's avatar
Paul Wilkins committed
248

249
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
250
251
252
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
253
      }
John Koleszar's avatar
John Koleszar committed
254
    }
John Koleszar's avatar
John Koleszar committed
255
  }
John Koleszar's avatar
John Koleszar committed
256

257
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
258

259
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
260
    vp9_cost_tokens(cpi->mb.partition_cost[i],
261
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
262
263
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
264
  /*rough estimate for costing*/
265
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
266

267
  if (cpi->common.frame_type != KEY_FRAME) {
268
    vp9_build_nmv_cost_table(
269
270
271
272
273
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
274
275
276
277
278
279

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
        cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
280
            cost_token(vp9_inter_mode_tree,
281
                       cpi->common.fc.inter_mode_probs[i],
282
                       vp9_inter_mode_encodings - NEARESTMV + m);
283
    }
284
  }
John Koleszar's avatar
John Koleszar committed
285
286
}

287
288
static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
  return bsize_from_dim_lookup[bwl][bhl];
Deb Mukherjee's avatar
Deb Mukherjee committed
289
290
}

291
292
293
294
static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
                                            struct macroblockd_plane *pd) {
  return get_block_size(plane_block_width_log2by4(bsize, pd),
                        plane_block_height_log2by4(bsize, pd));
Deb Mukherjee's avatar
Deb Mukherjee committed
295
296
}

Yaowu Xu's avatar
Yaowu Xu committed
297
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
298
299
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
300
301
302
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
303
304
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
305
306
  } else {
    double a = y - d;
307
308
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
309
310
311
  }
}

312
313
314
315
316
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
317
  // Normalized rate
318
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
342
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
365
366
367
368
369
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
370
  assert(x >= 0.0);
371
372
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
391
392
393
    model_rd_norm(x, &R, &D);
    *rate = ((n << 8) * R + 0.5);
    *dist = (var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
  }
  vp9_clear_system_state();
}

static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];

    // TODO(dkovalev) the same code in get_plane_block_size
411
412
413
    const int bwl = plane_block_width_log2by4(bsize, pd);
    const int bhl = plane_block_height_log2by4(bsize, pd);
    const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
Deb Mukherjee's avatar
Deb Mukherjee committed
414
415
416
417
418
419
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
420
421
    model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
422
423
424
425
426
427
428
429
430

    rate_sum += rate;
    dist_sum += dist;
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

431
432
433
434
435
436
437
438
439
440
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
                              int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];

  // TODO(dkovalev) the same code in get_plane_block_size
441
442
443
  const int bwl = plane_block_width_log2by4(bsize, pd);
  const int bhl = plane_block_height_log2by4(bsize, pd);
  const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
444
445
446
447
448
449
  unsigned int sse;
  int rate;
  int64_t dist;
  (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                            pd->dst.buf, pd->dst.stride, &sse);
  // sse works better than var, since there is no dc prediction used
450
451
  model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
                           pd->dequant[1] >> 3, &rate, &dist);
452
453
454
455
456

  *out_rate_sum = rate;
  *out_dist_sum = dist << 4;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
457
458
459
460
461
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
462
  int t = 4, j, k;
463
  BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
464
465
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
Jim Bankoski's avatar
Jim Bankoski committed
466
467
  const int width = plane_block_width(bsize, pd);
  const int height = plane_block_height(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
  int rate_sum = 0;
  int64_t dist_sum = 0;

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
    t = 4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
    t = 8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
    t = 16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
    t = 32;
  } else {
    assert(0);
  }
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
487
488
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
      int rate;
      int64_t dist;
      unsigned int sse;
      (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
                                p->src.stride,
                                pd->dst.buf + j * pd->dst.stride + k,
                                pd->dst.stride, &sse);
      // sse works better than var, since there is no dc prediction used
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
                               &rate, &dist);
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
  *out_rate_sum = rate_sum;
  *out_dist_sum = (dist_sum << 4);
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
508
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
509
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
510
  int i;
511
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
512

513
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
514
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
515
    error += (unsigned)this_diff * this_diff;
516
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
517
  }
John Koleszar's avatar
John Koleszar committed
518

519
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
520
  return error;
John Koleszar's avatar
John Koleszar committed
521
522
}

523
524
525
526
527
528
529
static const int16_t band_counts[TX_SIZE_MAX_SB][8] = {
  { 1, 2, 3, 4,  3,   16 - 13 },
  { 1, 2, 3, 4, 11,   64 - 21 },
  { 1, 2, 3, 4, 11,  256 - 21 },
  { 1, 2, 3, 4, 11, 1024 - 21 },
};

530
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
531
                              int plane, int block, PLANE_TYPE type,
532
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
533
                              TX_SIZE tx_size,
534
                              const int16_t *scan, const int16_t *nb) {
535
  MACROBLOCKD *const xd = &mb->e_mbd;
536
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
537
538
  int pt, c, cost;
  const int16_t *band_count = band_counts[tx_size];
539
  const int eob = xd->plane[plane].eobs[block];
540
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
541
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
542
543
  unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
544
  ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
545
  uint8_t token_cache[1024];
546
547

  // Check for consistency of tx_size with mode info
548
  assert((!type && !plane) || (type && plane));
549
550
551
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
552
    assert(tx_size == get_uv_tx_size(mbmi));
553
554
  }

555
  pt = combine_entropy_contexts(above_ec, left_ec);
556

557
558
  if (eob == 0) {
    // single eob token
559
560
    cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
    c = 0;
561
  } else {
562
    int v, prev_t, band = 1, band_left = band_count[1];
563
564
565

    // dc token
    v = qcoeff_ptr[0];
566
    prev_t = vp9_dct_value_tokens_ptr[v].token;
567
    cost = token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
568
    token_cache[0] = vp9_pt_energy_class[prev_t];
569
570
571
572

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
573
      int t;
574
575
576

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
577
      pt = get_coef_context(nb, token_cache, c);
578
      cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
579
      token_cache[rc] = vp9_pt_energy_class[t];
580
      prev_t = t;
581
582
583
      if (!--band_left) {
        band_left = band_count[++band];
      }
584
    }
585
586

    // eob token
587
    if (band < 6) {
588
      pt = get_coef_context(nb, token_cache, c);
589
      cost += token_costs[0][band][pt][DCT_EOB_TOKEN];
590
    }
591
592
  }

593
  // is eob first coefficient;
594
  *A = *L = c > 0;
595

596
597
598
  return cost;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
599
600
601
602
603
604
605
606
607
608
609
610
611
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int rate;
  int64_t dist;
  int64_t sse;
  int64_t best_rd;
  int skip;
612
  const int16_t *scan, *nb;
Deb Mukherjee's avatar
Deb Mukherjee committed
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
};

static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block, 16);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16);
  args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                &this_sse) >> shift;
  args->sse += this_sse >> shift;
629
630
631
632
633
634
635
636
637

  if (x->skip_encode &&
      xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
                    (1 << ss_txfrm_size)) >> shift;
    args->dist += p;
    args->sse  += p;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
638
639
640
641
642
643
644
645
646
647
648
649
650
651
}

static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

  args->rate += cost_coeffs(args->cm, args->x, plane, block,
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
652
                            args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
653
654
}

655
656
// FIXME(jingning): need to make the rd test of chroma components consistent
// with that of luma component. this function should be deprecated afterwards.
Deb Mukherjee's avatar
Deb Mukherjee committed
657
658
659
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD * const xd = &x->e_mbd;
Jim Bankoski's avatar
Jim Bankoski committed
660
661
  const int bwl = plane_block_width_log2by4(bsize, &xd->plane[plane]);
  const int bhl = plane_block_height_log2by4(bsize, &xd->plane[plane]);
Deb Mukherjee's avatar
Deb Mukherjee committed
662
  const int bw = 1 << bwl, bh = 1 << bhl;
663
  int i;
Deb Mukherjee's avatar
Deb Mukherjee committed
664
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
665
    0, 0, 0, INT64_MAX, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
666

667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
                 sizeof(ENTROPY_CONTEXT) * bw);
      vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
                 sizeof(ENTROPY_CONTEXT) * bh);
      args.scan = vp9_default_scan_4x4;
      args.nb = vp9_default_scan_4x4_neighbors;
      break;
    case TX_8X8:
      for (i = 0; i < bw; i += 2)
        args.t_above[i] = !!*(uint16_t *)&xd->plane[plane].above_context[i];
      for (i = 0; i < bh; i += 2)
        args.t_left[i] = !!*(uint16_t *)&xd->plane[plane].left_context[i];
      args.scan = vp9_default_scan_8x8;
      args.nb = vp9_default_scan_8x8_neighbors;
      break;
    case TX_16X16:
      for (i = 0; i < bw; i += 4)
        args.t_above[i] = !!*(uint32_t *)&xd->plane[plane].above_context[i];
      for (i = 0; i < bh; i += 4)
        args.t_left[i] = !!*(uint32_t *)&xd->plane[plane].left_context[i];
      args.scan = vp9_default_scan_16x16;
      args.nb = vp9_default_scan_16x16_neighbors;
      break;
    case TX_32X32:
      for (i = 0; i < bw; i += 8)
        args.t_above[i] = !!*(uint64_t *)&xd->plane[plane].above_context[i];
      for (i = 0; i < bh; i += 8)
        args.t_left[i] = !!*(uint64_t *)&xd->plane[plane].left_context[i];
      args.scan = vp9_default_scan_32x32;
      args.nb = vp9_default_scan_32x32_neighbors;
      break;
    default:
      assert(0);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720

  foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
  return args.rate;
}

static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;

  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
  }
  return cost;
}

static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                           int shift, int64_t *sse) {
  struct macroblockd_plane *p = &x->e_mbd.plane[0];
721
722
  const int bwl = plane_block_width_log2by4(bsize, p);
  const int bhl = plane_block_height_log2by4(bsize, p);
Deb Mukherjee's avatar
Deb Mukherjee committed
723
  int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
724
                              16 << (bwl + bhl), sse) >> shift;
Deb Mukherjee's avatar
Deb Mukherjee committed
725
726
727
728
729
730
731
732
733
734
735
736
  *sse >>= shift;
  return e;
}

static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                                int shift, int64_t *sse) {
  int64_t sum = 0, this_sse;
  int plane;

  *sse = 0;
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    struct macroblockd_plane *p = &x->e_mbd.plane[plane];
737
738
    const int bwl = plane_block_width_log2by4(bsize, p);
    const int bhl = plane_block_height_log2by4(bsize, p);
Deb Mukherjee's avatar
Deb Mukherjee committed
739
    sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
740
                           16 << (bwl + bhl), &this_sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
741
742
743
744
745
746
747
748
749
750
751
752
    *sse += this_sse;
  }
  *sse >>= shift;
  return sum >> shift;
}

static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct encode_b_args encode_args = {args->cm, x, NULL};
753
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
754

755
756
  if (args->skip)
    return;
757
758
759
760
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
  rd = MIN(rd1, rd2);
  if (rd > args->best_rd) {
761
762
763
764
765
766
767
    args->skip = 1;
    args->rate = INT_MAX;
    args->dist = INT64_MAX;
    args->sse  = INT64_MAX;
    return;
  }

Deb Mukherjee's avatar
Deb Mukherjee committed
768
769
770
771
772
773
774
775
776
777
778
779
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
    encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
  else
    xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);

  dist_block(plane, block, bsize, ss_txfrm_size, args);
  rate_block(plane, block, bsize, ss_txfrm_size, args);
}

static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skippable, int64_t *sse,
780
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
781
782
783
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *const pd = &xd->plane[0];
Jim Bankoski's avatar
Jim Bankoski committed
784
785
  const int bwl = plane_block_width_log2by4(bsize, pd);
  const int bhl = plane_block_height_log2by4(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
786
  const int bw = 1 << bwl, bh = 1 << bhl;
787
  int i;
Deb Mukherjee's avatar
Deb Mukherjee committed
788
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
789
                                    0, 0, 0, ref_best_rd, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
790
  xd->mode_info_context->mbmi.txfm_size = tx_size;
791
792
793
794
795
796
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(&args.t_above, pd->above_context,
                 sizeof(ENTROPY_CONTEXT) * bw);
      vpx_memcpy(&args.t_left, pd->left_context,
                 sizeof(ENTROPY_CONTEXT) * bh);
797
798
      get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, 0),
                      &args.scan, &args.nb);
799
800
801
802
803
804
      break;
    case TX_8X8:
      for (i = 0; i < bw; i += 2)
        args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
      for (i = 0; i < bh; i += 2)
        args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
805
806
      get_scan_nb_8x8(get_tx_type_8x8(PLANE_TYPE_Y_WITH_DC, xd),
                      &args.scan, &args.nb);
807
808
809
810
811
812
      break;
    case TX_16X16:
      for (i = 0; i < bw; i += 4)
        args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
      for (i = 0; i < bh; i += 4)
        args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
813
814
      get_scan_nb_16x16(get_tx_type_16x16(PLANE_TYPE_Y_WITH_DC, xd),
                        &args.scan, &args.nb);
815
816
817
818
819
820
821
822
823
824
825
826
      break;
    case TX_32X32:
      for (i = 0; i < bw; i += 8)
        args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
      for (i = 0; i < bh; i += 8)
        args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
      args.scan = vp9_default_scan_32x32;
      args.nb = vp9_default_scan_32x32_neighbors;
      break;
    default:
      assert(0);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
827
828
829
830
831

  foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
  *distortion = args.dist;
  *rate       = args.rate;
  *sse        = args.sse;
832
  *skippable  = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
833
834
835
836
837
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
838
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
839
840
841
842
843
844
845
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  if (max_txfm_size == TX_32X32 &&
846
847
      (cm->tx_mode == ALLOW_32X32 ||
       cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
848
849
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
850
851
852
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
853
    mbmi->txfm_size = TX_16X16;
854
  } else if (cm->tx_mode != ONLY_4X4) {
Deb Mukherjee's avatar
Deb Mukherjee committed
855
856
857
858
859
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
  super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
860
                           &sse[mbmi->txfm_size], ref_best_rd, bs,
Deb Mukherjee's avatar
Deb Mukherjee committed
861
862
863
864
                           mbmi->txfm_size);
  cpi->txfm_stepdown_count[0]++;
}

865
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
866
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
867
                                     int64_t *d, int64_t *distortion,
868
869
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
Deb Mukherjee's avatar
Deb Mukherjee committed
870
871
872
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
873
874
875
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
876
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
877
878
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
879
  int s0, s1;
880

881
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
882

883
884
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
885
886
    if (r[n][0] == INT_MAX)
      continue;
887
888
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
889
        r[n][1] += vp9_cost_zero(tx_probs[m]);
890
      else
891
        r[n][1] += vp9_cost_one(tx_probs[m]);
892
893
    }
  }
894

895
896
897
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
898

899
  for (n = TX_4X4; n <= max_txfm_size; n++) {
900
901
902
903
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
904
905
906
907
908
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
909
910
911
    }
  }

912
  if (max_txfm_size == TX_32X32 &&
913
914
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
915
916
917
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
918
  } else if (max_txfm_size >= TX_16X16 &&
919
920
921
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
922
923
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
924
    mbmi->txfm_size = TX_16X16;
925
926
927
928
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
929
930
931
932
933
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

934
  *distortion = d[mbmi->txfm_size];
935
  *rate       = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
936
937
  *skip       = s[mbmi->txfm_size];

938
939
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
940
941
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
942
943
944
945
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
946
947
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
948
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
949
  else
950
951
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
952

Deb Mukherjee's avatar
Deb Mukherjee committed
953
954
955
956
957
958
959
960
961
962
963
964
965
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] &&
      rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[0]++;
  } else if (max_txfm_size >= TX_16X16 &&
             rd[TX_16X16][1] < rd[TX_8X8][1] &&
             rd[TX_16X16][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
  } else {
    cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
966
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
967
}
968

Deb Mukherjee's avatar
Deb Mukherjee committed
969
970
971
972
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
973
                                          int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
974
975
976
977
978
979
980
                                          BLOCK_SIZE_TYPE bs,
                                          int *model_used) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
981
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
Deb Mukherjee's avatar
Deb Mukherjee committed
982
983
984
985
986
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
  int s0, s1;
  double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00};
  // double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00};
987

988
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
989

Deb Mukherjee's avatar
Deb Mukherjee committed
990
991
  // for (n = TX_4X4; n <= max_txfm_size; n++)
  //   r[n][0] = (r[n][0] * scale_r[n]);
992

Deb Mukherjee's avatar
Deb Mukherjee committed
993
994
995
996
997
998
999
1000
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
1001
  }
1002

Deb Mukherjee's avatar
Deb Mukherjee committed
1003
1004
1005
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
1006

Deb Mukherjee's avatar
Deb Mukherjee committed
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
    }
  }
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    rd[n][0] = (scale_rd[n] * rd[n][0]);
    rd[n][1] = (scale_rd[n] * rd[n][1]);
  }
1019

Deb Mukherjee's avatar
Deb Mukherjee committed
1020
  if (max_txfm_size == TX_32X32 &&
1021
1022
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
Deb Mukherjee's avatar
Deb Mukherjee committed
1023
1024
1025
1026
1027
        rd[TX_32X32][1] <= rd[TX_16X16][1] &&
        rd[TX_32X32][1] <= rd[TX_8X8][1] &&
        rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
1028
1029
1030
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
Deb Mukherjee's avatar
Deb Mukherjee committed
1031
1032
1033
               rd[TX_16X16][1] <= rd[TX_8X8][1] &&
               rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_16X16;
1034
1035
1036
1037
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT &&
Deb Mukherjee's avatar
Deb Mukherjee committed
1038
1039
1040
1041
1042
            rd[TX_8X8][1] <= rd[TX_4X4][1])) {
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
1043

Deb Mukherjee's avatar
Deb Mukherjee committed
1044
1045
1046
1047
  if (model_used[mbmi->txfm_size]) {
    // Actually encode using the chosen mode if a model was used, but do not
    // update the r, d costs
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
1048
                             &sse[mbmi->txfm_size], ref_best_rd,
1049
                             bs, mbmi->txfm_size);
Deb Mukherjee's avatar
Deb Mukherjee committed
1050
1051
  } else {
    *distortion = d[mbmi->txfm_size];
1052
    *rate       = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
Deb Mukherjee's avatar
Deb Mukherjee committed
1053
1054
    *skip       = s[mbmi->txfm_size];
  }